I'm trying to learn the datahubframework 5.2.2 and as a part of that implementing a small project.Could someone help me to understand the below points.
ingestionmapping.flow.json
{
"name": "ingestionmapping",
"description": "This is the default flow containing all of the default steps",
"batchSize": 100,
"threadCount": 4,
"options": {
"sourceQuery": null
},
"steps": {
"1": {
"name": "csv-ingest-step-json",
"description": "ingests json docs in JSON format to data-hub-STAGING",
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"fileLocations": {
"inputFilePath": "input",
"outputURIReplacement": ".*input*.,'/mapping-flow/json'",
"inputFileType": "csv"
},
"options": {
"targetDatabase": "data-hub-STAGING",
"sourceQuery": "cts.collectionQuery([])",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-ingestion-json"
],
"headers": {
"sources": [{"name": "ingestion_only-flow"}],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
}
}
},
"2": {
"name": "mapping-step",
"description": "This is the default mapping step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"batchSize" : 100,
"threadCount" : 4,
"options": {
"sourceDatabase": "data-hub-STAGING",
"targetDatabase": "data-hub-FINAL",
"sourceQuery": "cts.collectionQuery('mapping-flow-ingestion-json')",
"permissions": "data-hub-operator,read,data-hub-operator,update",
"outputFormat": "json",
"collections": [
"mapping-flow-mapping-json",
"mdm-content"
],
"targetEntity": "modifiedproduct",
"mapping": {
"name": "ingestionmapping-productMapping",
"version": 1
},
"validateEntity": false
}
}
}
}
mapping file:ingestionmapping-productMapping-1.mapping.json
{
"lang" : "zxx",
"name" : "ingestionmapping-productMapping",
"description" : "",
"version" : 1,
"targetEntityType" : "http://marklogic.com/modifiedproduct-0.0.1/modifiedproduct",
"sourceContext" : "/",
"sourceURI" : "/mapping-flow/json/....json",
"properties" : {
"mgame_id" : {
"sourcedFrom" : "game_id"
},
"mSKU" : {
"sourcedFrom" : "SKU"
},
"mtitle" : {
"sourcedFrom" : "title"
},
"mprice" : {
"sourcedFrom" : "price"
},
"mdescription" : {
"sourcedFrom" : "description"
},
"myears_active" : {
"sourcedFrom" : "years_active"
},
"mpublication_date" : {
"sourcedFrom" : "publication_date"
},
"mplayers" : {
"sourcedFrom" : "players"
},
"mage_range" : {
"sourcedFrom" : "age_range"
},
"msetup_time" : {
"sourcedFrom" : "setup_time"
},
"mplaying_time" : {
"sourcedFrom" : "playing_time"
},
"mchance" : {
"sourcedFrom" : "chance"
},
"mcategory" : {
"sourcedFrom" : "category"
},
"mhas_extensions" : {
"sourcedFrom" : "has_extensions"
},
"mhas_accessories" : {
"sourcedFrom" : "has_accessories"
},
"mhas_apparel" : {
"sourcedFrom" : "has_apparel"
},
"mpopularity_tier" : {
"sourcedFrom" : "popularity_tier"
},
"mprobability_apparel" : {
"sourcedFrom" : "probability_apparel"
},
"mprobability_accessories" : {
"sourcedFrom" : "probability_accessories"
},
"mprobability_extensions" : {
"sourcedFrom" : "probability_extensions"
}
}
}
Entity name : modifiedproduct version : 0.0.1
I have tried many times to debug the issue but couldnt able to find where it goes wrong. As a result it stores the same json to final database with out using the mapping attributes.
folder structure: Folder structure screenshot
json file
{
"envelope": {
"headers": {
"sources": [
{
"name": "ingestion_only-flow"
}
],
"createdOn": "2020-07-02T09:49:57.5876177+02:00",
"createdBy": "admin",
"createdUsingFile": "C:\\Users\\Jhansi\\IdeaProjects\\MarklogicDataHubFramework5.2\\input\\board_games.csv"
},
"triples": [
],
"instance": {
"game_id": "1000130",
"SKU": "177897644317",
"title": "careful crack",
"price": "24.95",
"description": "",
"years_active": "0",
"publication_date": "0",
"players": "2-4",
"age_range": "",
"setup_time": "< 5 minutes",
"playing_time": "1 hour",
"chance": "High",
"category": "Board Game",
"has_extensions": "False",
"has_accessories": "True",
"has_apparel": "False",
"popularity_tier": "3",
"probability_apparel": "0.3",
"probability_accessories": "0.3",
"probability_extensions": "0.3"
},
"attachments": null
}
}
Data Hub would render desired mapping when MarkLogic Entity Services is properly deployed: (Notice the Entity declaration in the mapped document, the key takeaway from all that equation)
https://docs.marklogic.com/datahub//flows/flow-definition.html#flow-definition__custom-step-settings
stepDefinitionName
: .....Tip: If you are customizing a default step type (ingestion, mapping, or mastering), leave the value asdefault-ingestion
,default-mapping
, or default-mastering....
Once above is reviewed, please follow Data Hub best practice and correct erroneous manual Steps
definitions. Below shouldn’t happen if you use Quick Start to create Flow
and Steps
, given your familiarity with MarkLogic Data Hub.
"steps": {
"1": {
……………
"stepDefinitionName": "productIngestion",
"stepDefinitionType": "INGESTION",
……………
"2": {
"name": "mapping-step",
"stepDefinitionName": "productMapping",
"stepDefinitionType": "MAPPING",
…………
"mapping": {
"name": "ingestionmapping-productMapping",
- Please cleanup your project structure and remove the contents in
step-definitions
folder. Project structure example ( the Pink part ):
- A working example of the
Steps
definitions is below. When in doubt, please validate the step in QuickStart.
{
"name" : "ingestionmapping",
"description" : "",
"batchSize" : 100,
"threadCount" : 4,
"stopOnError" : false,
"options" : { },
"version" : 0,
"steps" : {
"1" : {
"name" : "csv-ingest-step-json",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"headers" : {
"sources" : [ {
"name" : "ingestionmapping"
} ],
"createdOn" : "currentDateTime",
"createdBy" : "currentUser"
},
"sourceQuery" : "cts.collectionQuery([])",
"collections" : [ "mapping-flow-ingestion-json" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"outputFormat" : "json",
"targetDatabase" : "store-hub-STAGING"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : 0,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "default-ingestion",
"stepDefinitionType" : "INGESTION",
"fileLocations" : {
"inputFilePath" : "/mldhf/STORE/data/products/games",
"inputFileType" : "csv",
"outputURIReplacement" : ".*games*.,'/mapping-flow/json'",
"separator" : ","
}
},
"2" : {
"name" : "mapping-step",
"description" : "",
"options" : {
"additionalCollections" : [ ],
"sourceQuery" : "cts.collectionQuery([\"mapping-flow-ingestion-json\"])",
"mapping" : {
"name" : "ingestionmapping-mapping-step",
"version" : 1
},
"targetEntity" : "modifiedproduct",
"sourceDatabase" : "store-hub-STAGING",
"collections" : [ "mapping-flow-mapping-json", "mdm-content" ],
"permissions" : "data-hub-operator,read,data-hub-operator,update",
"validateEntity" : false,
"sourceCollection" : "csv-ingest-step-json",
"outputFormat" : "json",
"targetDatabase" : "store-hub-FINAL"
},
"customHook" : {
"module" : "",
"parameters" : { },
"user" : "",
"runBefore" : false
},
"retryLimit" : null,
"batchSize" : 100,
"threadCount" : 4,
"stepDefinitionName" : "entity-services-mapping",
"stepDefinitionType" : "MAPPING"
}
}
}