I am trying to create a nested aggregation on a single collection in MongoDB using $graphLookup with keys dependentOn and usedBy which are of Object datatype that store array of categories and the _id field inside.
Input collection sample (relations):
// Doc 1:
{
"_id": "p_commerce_metrics_dtst_v1",
"category": "dataset",
"dependentOn": {
"metrics": [
"net_booking_count_v1"
]
}
// Doc 2:
{
"_id": "net_booking_count_v1",
"category": "metric",
"dependentOn": {
"metrics": [
"cancelled_booking_count_v1",
"gross_booking_count_v1"
]
}
}
// Doc 3:
{
"_id": "cancelled_booking_count_v1",
"category": "metric",
"dependentOn": {
"measures": [
"hb_cancel_measure_v1"
]
}
}
// Doc 4:
{
"_id": "gross_booking_count_v1",
"category": "metric",
"dependentOn": {
"measures": [
"hb_booking_measure_v1"
]
}
}
// Doc 5 (Not dependentOn any other document _id. Dead End):
{
"_id": "hb_cancel_measure_v1",
"category": "measure",
"usedBy": {
"metrics": [
"cancelled_booking_count_v1",
"more_metrics"
]
}
}
// Doc 6 (Not dependentOn any other document _id. Dead End):
{
"_id": "hb_booking_measure_v1",
"category": "measure",
"usedBy": {
"metrics": [
"gross_booking_count_v1",
"more_metrics"
]
}
}
Expected Output:
// Fetch entire dependentOn hierarchy for p_commerce_metrics_dtst_v1
[
{
"name_version": "p_commerce_metrics_dtst_v1",
"category": "dataset",
"dependent_on": {
"metrics": [
"net_booking_count_v1",
"cancelled_booking_count_v1",
"gross_booking_count_v1"
],
"measures": [
"hb_cancel_measure_v1",
"hb_booking_measure_v1"
]
}
}
]
I tried using the following aggregation query, but not able to get the desired output.
[
{ $match: {
_id: "p_commerce_metrics_dtst_v1"
}},
{ $graphLookup: {
from: "relations",
startWith: "$dependentOn",
connectFromField: "dependentOn",
connectToField: "_id",
depthField: "depth",
as: "dependentOn"
}}
]
How can I achieve the desired output using aggregation query?
Your $graphLookup
overrode the original value of dependentOn
. Just use another name for the lookup result and unset it later. The remaining is just some data wrangling for flattening the lookup result.
db.relations.aggregate([
{
$match: {
_id: "p_commerce_metrics_dtst_v1"
}
},
{
$graphLookup: {
from: "relations",
startWith: "$dependentOn.metrics",
connectFromField: "dependentOn.metrics",
connectToField: "_id",
depthField: "depth",
as: "dependentOnMetrics"
}
},
{
"$set": {
"dependentOn": {
"metrics": {
"$setUnion": [
{
"$ifNull": [
"$dependentOn.metrics",
[]
]
},
{
"$reduce": {
"input": "$dependentOnMetrics.dependentOn.metrics",
"initialValue": [],
"in": {
"$setUnion": [
"$$value",
"$$this"
]
}
}
}
]
},
"measures": {
"$setUnion": [
{
$ifNull: [
"$dependentOn.measures",
[]
]
},
{
"$reduce": {
"input": "$dependentOnMetrics.dependentOn.measures",
"initialValue": [],
"in": {
"$setUnion": [
"$$value",
"$$this"
]
}
}
}
]
}
}
}
},
{
"$unset": "dependentOnMetrics"
}
])