transformapache-nifijolt

Jolt Combine two arrays and merge common elements w/ occasional field


I have the following json record that has 2 arrays that I would like to be combined into one array:

{
  "id": "1234X",
  "rating": "B",
  "files": [
    {
      "sequenceId": 7,
      "show": "No",
      "hash": "ABC123",
      "collectTime": 1716308631000
    },
    {
      "sequenceId": 13,
      "collectTime": 1716308631000
    },
    {
      "sequenceId": 10,
      "hash": "DEF234",
      "collectTime": 1716308631000
    },
    {
      "sequenceId": 8,
      "show": "No",
      "collectTime": 1716308631000
    }
  ],
  "tags": [
    {
      "hash": "DEF234",
      "tag": "Corrupt"
    }
  ]
}

And the result would be:

{
  "id" : "1234X",
  "rating" : "B",
  "tempFiles" : [ {
    "sequenceId" : 7,
    "show" : "No",
    "hash" : "ABC123",
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 13,
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 10,
    "hash" : "DEF234",
    "tag" : "Corrupt",    
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 8,
    "show" : "No",
    "collectTime" : 1716308631000
  } ]
}

I combined the two arrays into one and set the cardinality to ONE, but I'm confused on how to set the cardinality on array elements that don't necessarily have the same LHV for every element

I've tried the following shift and cardinality operations:

[
  {
    "operation": "shift",
    "spec": {
      "files|tags": {
        "*": "tempFiles[]"
      },
      "*": "&"
    }
  }, {
    "operation": "cardinality",
    "spec": {
      "tempFiles[]": {
        "*": {
          "hash": "ONE"
        }
      }
    }
  }
]

But I get:

{
  "id" : "1234X",
  "rating" : "B",
  "tempFiles" : [ {
    "sequenceId" : 7,
    "show" : "No",
    "hash" : "ABC123",
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 13,
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 10,
    "hash" : "DEF234",
    "collectTime" : 1716308631000
  }, {
    "sequenceId" : 8,
    "show" : "No",
    "collectTime" : 1716308631000
  }, {
    "hash" : "DEF234",
    "tag" : "Corrupt"
  } ]
}

How do I get one array record that contains the hash and has the collectTime and the tag with the sequenceId? I'm guessing there's a better way of doing this, but am new with these transforms


Solution

  • First I dont think cardinality works that way. Cardinality works against by transposing an Array of object into one complex object when applying "ONE", and transposing complex object into an Array when applying "MANY". For more info refer to cardinality. For what you have I was able to do it using the following spec. Im not sure if it can be done with less than 4 specially when the hash is not required field:

    [
      {
        // assign empty hash where missing
        "operation": "default",
        "spec": {
          "files[]": {
            "*": {
              "hash": ""
            }
          }
        }
      }
      ,
      {
        //group files and tags that belong to the same hash
        // under temp. for items with no hash just dump into 
        // NoHash array
        "operation": "shift",
        "spec": {
          "*": "&",
          "files": {
            "*": {
              "hash": {
                //group files under temp.(hashcode)
                "*": {
                  "@(2)": "temp.&1"
                },
                "": {
                  "@(2)": "NoHash[]"
                }
              }
            }
          },
          // group tags under temp.(hashcode)
          "tags": {
            "*": {
              "*": "temp.@(1,hash).&",
              "hash": null
            }
          }
        }
      }
      ,
      //Transpose NoHash elements and whatever was merged under temp
      //into the tempFiles[]
      {
        "operation": "shift",
        "spec": {
          "*": "&",
          "temp": {
            "*": "tempFiles[]"
          },
          "NoHash": {
            "*": "tempFiles[]"
          }
        }
      }
      ,
      // remove unwanted hashcode with empty string value that was
      // add in the first place
      {
        "operation": "shift",
        "spec": {
          "*": "&",
          "tempFiles": {
            "*": {
              "hash": {
                "": null,
                "*": {
                  "$": "tempFiles[&3].&2"
                }
              },
              "*": "tempFiles[&1].&"
            }
          }
        }
      }
      /**/
    ]