jsonazureazure-data-lakeazure-data-factoryu-sql

U-SQL script for JSON's transformation


I have JSON-file with many levels and need to transform it in Azure Data Lake Analytics to get all data started from "vin" level (incl.)

The cutted json is:

"vehicleStatusResponse": {
    "vehicleStatuses": [
        {
            "vin": "ABC1234567890",
            "triggerType": {
                "triggerType": "TIMER",
                "context": "RFMS",
                "driverId": {
                    "tachoDriverIdentification": {
                        "driverIdentification": "123456789",
                        "cardIssuingMemberState": "BRA",
                        "driverAuthenticationEquipment": "CARD",
                        "cardReplacementIndex": "0",
                        "cardRenewalIndex": "1"
                    }
                }
            },
            "receivedDateTime": "2020-02-12T04:11:19.221Z",
            "hrTotalVehicleDistance": 103306960,
            "totalEngineHours": 3966.6216666666664,
            "driver1Id": {
                "tachoDriverIdentification": {
                    "driverIdentification": "BRA1234567"
                }
            },
            "engineTotalFuelUsed": 48477520,
            "accumulatedData": {
                "durationWheelbaseSpeedOverZero": 8309713,
                "distanceCruiseControlActive": 8612200,
                "durationCruiseControlActive": 366083,
                "fuelConsumptionDuringCruiseActive": 3064170,
                "durationWheelbaseSpeedZero": 5425783,
                "fuelWheelbaseSpeedZero": 3332540,
                "fuelWheelbaseSpeedOverZero": 44709670,
                "ptoActiveClass": [
                    {
                        "label": "wheelbased speed >0",
                        "seconds": 16610,
                        "meters": 29050,
                        "milliLitres": 26310
                    },
                    {
                        "label": "wheelbased speed =0",
                        "seconds": 457344,
                        "milliLitres": 363350

There is a script for this case:

CREATE ASSEMBLY IF NOT EXISTS [Newtonsoft.Json] FROM "adl://#####.azuredatalakestore.net/Newtonsoft.Json.dll";
CREATE ASSEMBLY IF NOT EXISTS [Microsoft.Analytics.Samples.Formats] FROM "adl://#####.azuredatalakestore.net/Microsoft.Analytics.Samples.Formats.dll";


REFERENCE ASSEMBLY [Newtonsoft.Json];
REFERENCE ASSEMBLY [Microsoft.Analytics.Samples.Formats];

USING Microsoft.Analytics.Samples.Formats.Json;

DECLARE @InputFile string = "response.json";
DECLARE @OutputFile string = "response.csv";


@json =
EXTRACT
    vin string,
    triggerType string,
    driverIdentification string,
    receivedDateTime DateTime,
    hrTotalVehicleDistance int,
    totalEngineHours float,
    engineTotalFuelUsed int,
    durationWheelbaseSpeedOverZero int,
    distanceCruiseControlActive int,
    durationCruiseControlActive int,
    fuelConsumptionDuringCruiseActive int,
    durationWheelbaseSpeedZero int,
    fuelWheelbaseSpeedZero int
    ptoActiveClass string
    label string
    seconds int
    meters int
    millilitres int


FROM
    @InputFile
USING new MultiLevelJsonExtractor("vehicleStatusResponse.vehicleStatuses.vin[*]",
    true,
    "vin",
    "triggerType",
    "driverIdentification",
    "receivedDateTime",
    "hrTotalVehicleDistance",
    "totalEngineHours",
    "engineTotalFuelUsed",
    "durationWheelbaseSpeedOverZero",
    "distanceCruiseControlActive",
    "durationCruiseControlActive",
    "fuelConsumptionDuringCruiseActive",
    "durationWheelbaseSpeedZero",
    "fuelWheelbaseSpeedZero", 
    "ptoActiveClass",
    "label",
    "seconds",
    "meters",
    "millilitres"
    );
@new =
SELECT
    vin,
    triggerType,
    driverIdentification,
    receivedDateTime,
    hrTotalVehicleDistance,
    totalEngineHours,
    engineTotalFuelUsed,
    durationWheelbaseSpeedOverZero,
    distanceCruiseControlActive,
    durationCruiseControlActive,
    fuelConsumptionDuringCruiseActive,
    durationWheelbaseSpeedZero,
    fuelWheelbaseSpeedZero,
    ptoActiveClass,
    label,
    seconds,
    meters,
    millilitres
FROM @json;
OUTPUT @new
TO @OutputFile
USING Outputters.Csv();

Anyway, I get only blank response.csv with no data. What's wrong in my script? Will be interesting if you may have other ways to transform hierarchical json data.


Solution

  • You are not correctly extracting the JSON. You need to use it like:

    FROM
        @InputFile
    USING new MultiLevelJsonExtractor("vehicleStatusResponse.vehicleStatuses.vin[*]",
        true,
        "tachoDriverIdentification.driverIdentification"
        );
    

    You can read more about JSON advanced JSON manipulation using U-SQL here.