azure-data-factory

adf collection reference with 1 row


Using ADF Collection reference we transfer this into json format . whilst this works for muliple records . It fails with 1 xml record.

The error is ErrorCode=UserErrorInvalidPathForCollectionReferenceNode,'Type=Microsoft.DataTransfer.Common.Shared.HybridDeliveryException,Message=Invalid json path '$['file-data']['files']['row']' for collection reference node

The sample file for 2 rows that works is below

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<file-data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <files>
        <row record="01">
            <FilePath>/MYDIRECTORY/MYTest/</FilePath>
            <ValidatedBy>Joe Blogg</ValidatedBy>
        </row>
        <row record="02">
            <FilePath>/MYDIRECTORY/MYTest01/</FilePath>
            <ValidatedBy>Jose</ValidatedBy>
        </row>
    </files>
</file-data>

This does not work .

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<file-data xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
   <files>
        <row record="01">
            <FilePath>/MYDIRECTORY/MYTest/</FilePath>
            <ValidatedBy>Joe Blogg</ValidatedBy>
        </row>
    </files>
</file-data>

sample output required

{"FilePath":"/MYDIRECTORY/MYTest/","ValidatedBy":"Joe Blogg"}

ADF Collection reference  This has been reported here too Azure Data Factory - Collection reference failing when only 1 record

any suggestions


Solution

  • enter image description here

    When defining collection reference as $['file-data']['files']['row'] if there is only 1 row it treats as object not as array that may be the reason to get above error. To get your required output follow below procedure:

    Go to mapping make Collection reference as empty, enable Advanced editor and Define columns as below:

    enter image description here

    The you will get the Json as shown below:

    {"FilePath":"/MYDIRECTORY/MYTest/","ValidatedBy":"Joe Blogg"}
    

    enter image description here

    update:

    Use lookup activity to read the file. Count the rows in file using @length(activity('Lookup1').output.value[0]['file-data'].files.row)

    After reading the file add if activity to the lookup activity with condition: @greater(length(activity('Lookup1').output.value[0]['file-data'].files.row),1)

    To check the row count is greater than 1 or not. If true add copy activity with required source with below mapping:

    enter image description here

    If false add copy activity with required source with below mapping:

    enter image description here

    Then it will work according to the row count. Here is the pipeline Json for your requirement:

    {
        "name": "pipeline4",
        "properties": {
            "activities": [
                {
                    "name": "Lookup1",
                    "type": "Lookup",
                    "dependsOn": [],
                    "policy": {
                        "timeout": "0.12:00:00",
                        "retry": 0,
                        "retryIntervalInSeconds": 30,
                        "secureOutput": false,
                        "secureInput": false
                    },
                    "userProperties": [],
                    "typeProperties": {
                        "source": {
                            "type": "XmlSource",
                            "storeSettings": {
                                "type": "AzureBlobFSReadSettings",
                                "recursive": true,
                                "enablePartitionDiscovery": false
                            },
                            "formatSettings": {
                                "type": "XmlReadSettings",
                                "validationMode": "none",
                                "namespaces": true
                            }
                        },
                        "dataset": {
                            "referenceName": "Xmlmulr",
                            "type": "DatasetReference"
                        },
                        "firstRowOnly": false
                    }
                },
                {
                    "name": "If Condition1",
                    "type": "IfCondition",
                    "dependsOn": [
                        {
                            "activity": "Lookup1",
                            "dependencyConditions": [
                                "Succeeded"
                            ]
                        }
                    ],
                    "userProperties": [],
                    "typeProperties": {
                        "expression": {
                            "value": "@greater(length(activity('Lookup1').output.value[0]['file-data'].files.row),1)",
                            "type": "Expression"
                        },
                        "ifFalseActivities": [
                            {
                                "name": "Copy data1_copy1",
                                "type": "Copy",
                                "dependsOn": [],
                                "policy": {
                                    "timeout": "0.12:00:00",
                                    "retry": 0,
                                    "retryIntervalInSeconds": 30,
                                    "secureOutput": false,
                                    "secureInput": false
                                },
                                "userProperties": [],
                                "typeProperties": {
                                    "source": {
                                        "type": "XmlSource",
                                        "storeSettings": {
                                            "type": "AzureBlobFSReadSettings",
                                            "recursive": true,
                                            "enablePartitionDiscovery": false
                                        },
                                        "formatSettings": {
                                            "type": "XmlReadSettings",
                                            "validationMode": "none",
                                            "namespaces": true
                                        }
                                    },
                                    "sink": {
                                        "type": "JsonSink",
                                        "storeSettings": {
                                            "type": "AzureBlobFSWriteSettings"
                                        },
                                        "formatSettings": {
                                            "type": "JsonWriteSettings"
                                        }
                                    },
                                    "enableStaging": false,
                                    "translator": {
                                        "type": "TabularTranslator",
                                        "mappings": [
                                            {
                                                "source": {
                                                    "path": "$['file-data']['files']['row']['FilePath']"
                                                },
                                                "sink": {
                                                    "path": "FilePath"
                                                }
                                            },
                                            {
                                                "source": {
                                                    "path": "$['file-data']['files']['row']['ValidatedBy']"
                                                },
                                                "sink": {
                                                    "path": "ValidatedBy"
                                                }
                                            }
                                        ],
                                        "collectionReference": ""
                                    }
                                },
                                "inputs": [
                                    {
                                        "referenceName": "Xml1",
                                        "type": "DatasetReference"
                                    }
                                ],
                                "outputs": [
                                    {
                                        "referenceName": "Json1",
                                        "type": "DatasetReference"
                                    }
                                ]
                            }
                        ],
                        "ifTrueActivities": [
                            {
                                "name": "Copy data1",
                                "type": "Copy",
                                "dependsOn": [],
                                "policy": {
                                    "timeout": "0.12:00:00",
                                    "retry": 0,
                                    "retryIntervalInSeconds": 30,
                                    "secureOutput": false,
                                    "secureInput": false
                                },
                                "userProperties": [],
                                "typeProperties": {
                                    "source": {
                                        "type": "XmlSource",
                                        "storeSettings": {
                                            "type": "AzureBlobFSReadSettings",
                                            "recursive": true,
                                            "enablePartitionDiscovery": false
                                        },
                                        "formatSettings": {
                                            "type": "XmlReadSettings",
                                            "validationMode": "none",
                                            "namespaces": true
                                        }
                                    },
                                    "sink": {
                                        "type": "JsonSink",
                                        "storeSettings": {
                                            "type": "AzureBlobFSWriteSettings"
                                        },
                                        "formatSettings": {
                                            "type": "JsonWriteSettings"
                                        }
                                    },
                                    "enableStaging": false,
                                    "translator": {
                                        "type": "TabularTranslator",
                                        "mappings": [
                                            {
                                                "source": {
                                                    "path": "['FilePath']"
                                                },
                                                "sink": {
                                                    "path": "FilePath"
                                                }
                                            },
                                            {
                                                "source": {
                                                    "path": "['ValidatedBy']"
                                                },
                                                "sink": {
                                                    "path": "ValidatedBy"
                                                }
                                            }
                                        ],
                                        "collectionReference": "$['file-data']['files']['row']"
                                    }
                                },
                                "inputs": [
                                    {
                                        "referenceName": "Xmlmulr",
                                        "type": "DatasetReference"
                                    }
                                ],
                                "outputs": [
                                    {
                                        "referenceName": "Json1",
                                        "type": "DatasetReference"
                                    }
                                ]
                            }
                        ]
                    }
                }
            ],
            "variables": {
                "rowcount": {
                    "type": "Integer"
                },
                "row": {
                    "type": "String"
                },
                "r1": {
                    "type": "Array"
                }
            },
            "annotations": []
        }
    }