marklogicmarklogic-dhf

Marklogic Data Hub Frame 5.2.2 Mapping


I'm trying to learn the datahubframework 5.2.2 and as a part of that implementing a small project.Could someone help me to understand the below points.

ingestionmapping.flow.json

{
  "name": "ingestionmapping",
  "description": "This is the default flow containing all of the default steps",
  "batchSize": 100,
  "threadCount": 4,
  "options": {
    "sourceQuery": null
  },
  "steps": {
    "1": {
      "name": "csv-ingest-step-json",
      "description": "ingests json docs in JSON format to data-hub-STAGING",
      "stepDefinitionName": "productIngestion",
      "stepDefinitionType": "INGESTION",
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "batchSize" : 100,
      "threadCount" : 4,
      "fileLocations": {
        "inputFilePath": "input",
        "outputURIReplacement": ".*input*.,'/mapping-flow/json'",
        "inputFileType": "csv"
      },
      "options": {
        "targetDatabase": "data-hub-STAGING",
        "sourceQuery": "cts.collectionQuery([])",
        "permissions": "data-hub-operator,read,data-hub-operator,update",
        "outputFormat": "json",
        "collections": [
          "mapping-flow-ingestion-json"
        ],
        "headers": {
          "sources": [{"name":  "ingestion_only-flow"}],
          "createdOn" : "currentDateTime",
          "createdBy" : "currentUser"
        }
      }
    },
    "2": {
      "name": "mapping-step",
      "description": "This is the default mapping step",
      "stepDefinitionName": "productMapping",
      "stepDefinitionType": "MAPPING",
      "customHook" : {
        "module" : "",
        "parameters" : { },
        "user" : "",
        "runBefore" : false
      },
      "batchSize" : 100,
      "threadCount" : 4,
      "options": {
        "sourceDatabase": "data-hub-STAGING",
        "targetDatabase": "data-hub-FINAL",
        "sourceQuery": "cts.collectionQuery('mapping-flow-ingestion-json')",
        "permissions": "data-hub-operator,read,data-hub-operator,update",
        "outputFormat": "json",
        "collections": [
          "mapping-flow-mapping-json",
          "mdm-content"
        ],
        "targetEntity": "modifiedproduct",
        "mapping": {
          "name": "ingestionmapping-productMapping",
          "version": 1
        },
        "validateEntity": false
      }
    }
  }
}

mapping file:ingestionmapping-productMapping-1.mapping.json

{
  "lang" : "zxx",
  "name" : "ingestionmapping-productMapping",
  "description" : "",
  "version" : 1,
  "targetEntityType" : "http://marklogic.com/modifiedproduct-0.0.1/modifiedproduct",
  "sourceContext" : "/",
  "sourceURI" : "/mapping-flow/json/....json",
  "properties" : {
    "mgame_id" : {
      "sourcedFrom" : "game_id"
    },
    "mSKU" : {
      "sourcedFrom" : "SKU"
    },
    "mtitle" : {
      "sourcedFrom" : "title"
    },
    "mprice" : {
      "sourcedFrom" : "price"
    },
    "mdescription" : {
      "sourcedFrom" : "description"
    },
    "myears_active" : {
      "sourcedFrom" : "years_active"
    },
    "mpublication_date" : {
      "sourcedFrom" : "publication_date"
    },
    "mplayers" : {
      "sourcedFrom" : "players"
    },
    "mage_range" : {
      "sourcedFrom" : "age_range"
    },
    "msetup_time" : {
      "sourcedFrom" : "setup_time"
    },
    "mplaying_time" : {
      "sourcedFrom" : "playing_time"
    },
    "mchance" : {
      "sourcedFrom" : "chance"
    },
    "mcategory" : {
      "sourcedFrom" : "category"
    },
    "mhas_extensions" : {
      "sourcedFrom" : "has_extensions"
    },
    "mhas_accessories" : {
      "sourcedFrom" : "has_accessories"
    },
    "mhas_apparel" : {
      "sourcedFrom" : "has_apparel"
    },
    "mpopularity_tier" : {
      "sourcedFrom" : "popularity_tier"
    },
    "mprobability_apparel" : {
      "sourcedFrom" : "probability_apparel"
    },
    "mprobability_accessories" : {
      "sourcedFrom" : "probability_accessories"
    },
    "mprobability_extensions" : {
      "sourcedFrom" : "probability_extensions"
    }
  }
}

Entity name : modifiedproduct version : 0.0.1

I have tried many times to debug the issue but couldnt able to find where it goes wrong. As a result it stores the same json to final database with out using the mapping attributes.

folder structure: Folder structure screenshot

json file

{
"envelope": {
"headers": {
"sources": [
{
"name": "ingestion_only-flow"
}
], 
"createdOn": "2020-07-02T09:49:57.5876177+02:00", 
"createdBy": "admin", 
"createdUsingFile": "C:\\Users\\Jhansi\\IdeaProjects\\MarklogicDataHubFramework5.2\\input\\board_games.csv"
}, 
"triples": [
], 
"instance": {
"game_id": "1000130", 
"SKU": "177897644317", 
"title": "careful crack", 
"price": "24.95", 
"description": "", 
"years_active": "0", 
"publication_date": "0", 
"players": "2-4", 
"age_range": "", 
"setup_time": "< 5 minutes", 
"playing_time": "1 hour", 
"chance": "High", 
"category": "Board Game", 
"has_extensions": "False", 
"has_accessories": "True", 
"has_apparel": "False", 
"popularity_tier": "3", 
"probability_apparel": "0.3", 
"probability_accessories": "0.3", 
"probability_extensions": "0.3"
}, 
"attachments": null
}
}

Solution

  • Data Hub would render desired mapping when MarkLogic Entity Services is properly deployed: (Notice the Entity declaration in the mapped document, the key takeaway from all that equation)

    enter image description here

    https://docs.marklogic.com/datahub//flows/flow-definition.html#flow-definition__custom-step-settings

    stepDefinitionName: .....Tip: If you are customizing a default step type (ingestion, mapping, or mastering), leave the value as default-ingestion, default-mapping, or default-mastering....

    Once above is reviewed, please follow Data Hub best practice and correct erroneous manual Steps definitions. Below shouldn’t happen if you use Quick Start to create Flow and Steps, given your familiarity with MarkLogic Data Hub.

    "steps": {
        "1": {
    ……………
          "stepDefinitionName": "productIngestion",
          "stepDefinitionType": "INGESTION",
    ……………
    
        
    
    "2": {
          "name": "mapping-step",
    
          "stepDefinitionName": "productMapping",
          "stepDefinitionType": "MAPPING",
    …………
    
            "mapping": {
              "name": "ingestionmapping-productMapping",
    
    1. Please cleanup your project structure and remove the contents in step-definitions folder. Project structure example ( the Pink part ):

    enter image description here

    1. A working example of the Steps definitions is below. When in doubt, please validate the step in QuickStart.
    {
      "name" : "ingestionmapping",
      "description" : "",
      "batchSize" : 100,
      "threadCount" : 4,
      "stopOnError" : false,
      "options" : { },
      "version" : 0,
      "steps" : {
        "1" : {
          "name" : "csv-ingest-step-json",
          "description" : "",
          "options" : {
            "additionalCollections" : [ ],
            "headers" : {
              "sources" : [ {
                "name" : "ingestionmapping"
              } ],
              "createdOn" : "currentDateTime",
              "createdBy" : "currentUser"
            },
            "sourceQuery" : "cts.collectionQuery([])",
            "collections" : [ "mapping-flow-ingestion-json" ],
            "permissions" : "data-hub-operator,read,data-hub-operator,update",
            "outputFormat" : "json",
            "targetDatabase" : "store-hub-STAGING"
          },
          "customHook" : {
            "module" : "",
            "parameters" : { },
            "user" : "",
            "runBefore" : false
          },
          "retryLimit" : 0,
          "batchSize" : 100,
          "threadCount" : 4,
          "stepDefinitionName" : "default-ingestion",
          "stepDefinitionType" : "INGESTION",
          "fileLocations" : {
            "inputFilePath" : "/mldhf/STORE/data/products/games",
            "inputFileType" : "csv",
            "outputURIReplacement" : ".*games*.,'/mapping-flow/json'",
            "separator" : ","
          }
        },
        "2" : {
          "name" : "mapping-step",
          "description" : "",
          "options" : {
            "additionalCollections" : [ ],
            "sourceQuery" : "cts.collectionQuery([\"mapping-flow-ingestion-json\"])",
            "mapping" : {
              "name" : "ingestionmapping-mapping-step",
              "version" : 1
            },
            "targetEntity" : "modifiedproduct",
            "sourceDatabase" : "store-hub-STAGING",
            "collections" : [ "mapping-flow-mapping-json", "mdm-content" ],
            "permissions" : "data-hub-operator,read,data-hub-operator,update",
            "validateEntity" : false,
            "sourceCollection" : "csv-ingest-step-json",
            "outputFormat" : "json",
            "targetDatabase" : "store-hub-FINAL"
          },
          "customHook" : {
            "module" : "",
            "parameters" : { },
            "user" : "",
            "runBefore" : false
          },
          "retryLimit" : null,
          "batchSize" : 100,
          "threadCount" : 4,
          "stepDefinitionName" : "entity-services-mapping",
          "stepDefinitionType" : "MAPPING"
        }
      }
    }