Is there a way in which we can connect to Power BI dataset in Azure Synapse notebook or Azure Data Factory and execute DMV's, say for example like below:
select * from $System.TMSCHEMA_MEASURES
You can use .NET for Spark with NuGet packages, and there's a .NET Core NuGet package for ADOMD.NET.
ADOMD.NET is an ADO.NET provider, and that doesn't natively interop with Spark. So that's the hardest part here. Here's some conversion code you can run in the first cell of your notebook:
#r "nuget: Microsoft.AnalysisServices.AdomdClient.NetCore.retail.amd64, 19.51.0"
using System.Data;
using Microsoft.Spark.Sql;
using Microsoft.Spark.Sql.Types;
using T=Microsoft.Spark.Sql.Types;
using Microsoft.AnalysisServices.AdomdClient;
DataFrame RunXmlaQuery(string constr, string query, int topRows=0)
{
using (var con = new AdomdConnection(constr))
{
con.Open();
var cmd = con.CreateCommand();
cmd.CommandText = query;
using (var rdr = cmd.ExecuteReader())
{
return GetDataFrame(rdr,topRows);
}
}
}
DataFrame GetDataFrame( IDataReader rdr, int topRows = 0)
{
var fields= GetFields(rdr).ToList();
var type = new StructType(fields);
//Console.WriteLine(type.SerializeToJson());
return spark.CreateDataFrame(GetRows(rdr, topRows), type);
}
IEnumerable<GenericRow> GetRows(IDataReader rdr, int topRows = 0)
{
int rows = 0;
while (rdr.Read())
{
rows++;
var values = new object[rdr.FieldCount];
rdr.GetValues(values);
for (int i=0;i<rdr.FieldCount;i++)
{
var obj = values[i];
if (obj is UInt64 u)
{
values[i] = Convert.ToInt64(u);
}
else if (obj is DateTime dt)
{
values[i] = dt.ToString("o");
}
else if (obj is Date d)
{
values[i] = d.ToString("o");
}
}
yield return new GenericRow(values);
if (topRows > 0 && rows >= topRows)
break;
}
}
DataType GetSparkType(Type t)
{
if (t == typeof(string))
return new T.StringType();
if (t == typeof(int))
return new T.IntegerType();
if (t == typeof(long) || t == typeof(UInt64))
return new T.LongType();
if (t == typeof(float))
return new T.FloatType();
if (t == typeof(double))
return new T.DoubleType();
if (t == typeof(decimal))
return new T.DecimalType();
if (t == typeof(DateTime) || t == typeof(DateTimeOffset))
//return new T.TimestampType(); not yet supported
return new T.StringType();
if (t == typeof(Date))
//return new T.DateType();
return new T.StringType();
if (t == typeof(Guid))
return new T.StringType();
if (t == typeof(decimal))
return new T.DecimalType();
if (t == typeof(long))
return new T.LongType();
if (t == typeof(short))
return new T.ShortType();
if (t == typeof(bool))
return new T.BooleanType();
if (t == typeof(byte))
return new T.ByteType();
throw new InvalidOperationException($"Unsupported Type for DataFrame conversion: {t.Name}");
}
IEnumerable<StructField> GetFields(IDataReader rdr)
{
for (int i = 0; i < rdr.FieldCount; i++)
{
var type = GetSparkType(rdr.GetFieldType(i));
var name = rdr.GetName(i);
//Console.WriteLine($"{name} {type}");
yield return new StructField(name, type, isNullable:true);
}
}
Once you have that, you just need to configure your connectionstring and run the query.
var clientId = "<ClientId>";
var tenantId = "<TenantId>";
var userId = $"app:{clientId}@{tenantId}";
var secret = MSSparkUtils.Credentials.GetSecret("<KeyVaultName>", "<SecretName>");
var xmlaEndpoint = "powerbi://api.powerbi.com/v1.0/myorg/<WorkspaceName>";
var dataset = "<DatasetName>";
var query = "select * from $System.TMSCHEMA_TABLES";
var constr = $"Data Source={xmlaEndpoint};database={dataSet};user id={userId};password={secret}";
var df = RunXmlaQuery(constr,query);
Display(df);