mongodbaggregation-frameworkpipelinesubdocument

How do I limit/filter entries in a sub document?


Answer is at bottom of my question

I am close to getting what I want... but just a little bit off.

what I want to return is all active kennels and if any kennel has a booking(s), for a specific Year (2024), an array of dayOfYear (eg [100,101,102,103]) and an array of rooms (eg [1,2] )

Here is a Mongo playground

This is what I am getting now

[
        {
            "_id": "65ef79a2331ab6aef4fae5d4",
            "identifier": "1.1.6x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 6,
            "bookings": [
                {
                    "_id": "65ef88f444e7d6607498ac2e",
                    "year": 2024,
                    "dayOfYear": 100,
                    "duration": 6
                },
                {
                    "_id": "65f0ca69f2667460e600a46a",
                    "year": 2024,
                    "dayOfYear": 107,
                    "duration": 1
                }
            ]
        }
]

this is what I would like to get - all kennels, AND the kennel with identifier "1.1.6x10" should show a single booking for 2024 - 100, (for clarity - bookings for any Kennel with a booking on 2024 - 100 should also be returned, but for this example data, only the 1.1.6x10 kennel has a booking)

[
        {
            "_id": "65ef79a2331ab6aef4fae5d4",
            "identifier": "1.1.6x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 6,
            "bookings": [
                {
                    "_id": "65ef88f444e7d6607498ac2e",
                    "year": 2024,
                    "dayOfYear": 100,
                    "duration": 6
                }
            ]
        },
        {
            "_id": "65ef79a2331ab6aef4fae5d5",
            "identifier": "1.2.6x10",
            "active": true,
            "partition": "2",
            "length": 10,
            "width": 6,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e5",
            "identifier": "2.1.4x10",
            "active": true,
            "partition": "1",
            "length": 10,
            "width": 4,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e6",
            "identifier": "2.2.4x10",
            "active": true,
            "partition": "2",
            "length": 10,
            "width": 4,
            "bookings": []
        },
        {
            "_id": "65ef79a3331ab6aef4fae5e7",
            "identifier": "2.3.4x10",
            "active": true,
            "partition": "3",
            "length": 10,
            "width": 4,
            "bookings": []
        }
    ]

Here is the mongodb pipeline I am using, aggregating on 'kennels'

  let pipeline = [
    {
      $lookup: {
        from: 'rooms',
        localField: 'REF_RoomID',
        foreignField: '_id',
        as: 'room',
      },
    },
    {
      $unwind: {
        path: '$room',
        preserveNullAndEmptyArrays: false,
      },
    },
    {
      $lookup: {
        from: 'bookings',
        localField: '_id',
        foreignField: 'REF_KennelID',
        as: 'bookings',
      },
    },
    {
      $match: {
        $and: [
          { active: true },
          { 'room.number': { $in: [1,2] } },
          //
          // this is my problem area
          //{ 'bookings.year': { $eq: 2024 } },
          //{ 'bookings.dayOfYear': { $eq: 100 } },
        ],
      },
    },
    {
      $project: {
        REF_RoomID: 0,
        room: 0,
        'bookings.REF_KennelID': 0,
        'bookings.__v': 0,
      },
    },
  ];

Here is some data:

Rooms

{
  "_id":  "65ef799f331ab6aef4fae5ba",
  "number": "1",
  "length": "10",
  "width": "12"
},
{
  "_id": "65ef79a0331ab6aef4fae5be",
  "number": "2",
  "length": "10",
  "width": "12"
}

Kennels

{
  "_id": "65ef79a2331ab6aef4fae5d4",
  "REF_RoomID":  "65ef799f331ab6aef4fae5ba",
  "identifier": "1.1.6x10",
  "active": true,
  "partition": "1",
  "length": 10,
  "width": 6
},
{
  "_id": "65ef79a2331ab6aef4fae5d5",
  "REF_RoomID": "65ef799f331ab6aef4fae5ba",
  "identifier": "1.2.6x10",
  "active": true,
  "partition": "2",
  "length": 10,
  "width": 6
},
{
  "_id": "65ef79a3331ab6aef4fae5e5",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.1.4x10",
  "active": true,
  "partition": "1",
  "length": 10,
  "width": 4
},
{
  "_id": "65ef79a3331ab6aef4fae5e6",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.2.4x10",
  "active": true,
  "partition": "2",
  "length": 10,
  "width": 4
},
{
  "_id": "65ef79a3331ab6aef4fae5e7",
  "REF_RoomID": "65ef79a0331ab6aef4fae5be",
  "identifier": "2.3.4x10",
  "active": true,
  "partition": "3",
  "length": 10,
  "width": 4
}

Bookings

{
  "_id": "65ef88f444e7d6607498ac2e",
  "REF_KennelID": "65ef79a2331ab6aef4fae5d4",
  "year": 2024,
  "dayOfYear": 100,
  "duration": 6
},
{
  "_id": "65f0ca69f2667460e600a46a",
  "REF_KennelID": "65ef79a2331ab6aef4fae5d4",
  "year": 2024,
  "dayOfYear": 107,
  "duration": 1
}

thank you !

ANSWER

here is the pipeline that works - thanks to https://stackoverflow.com/users/1431750/aneroid for pushing me down the right path !

let pipeline = [
    {
      $match: {
        active: true,
      },
    },
    {
      $lookup: {
        from: "rooms",
        localField: "REF_RoomID",
        foreignField: "_id",
        as: "room",
      },
    },
    {
      $match: {
        "room.number": {
          $in: ["1","2"],
        },
      },
    },
    {
      $unwind: {
        path: "$room",
        preserveNullAndEmptyArrays: true,
      },
    },
    {
      $lookup: {
        from: "bookings",
        localField: "_id",
        foreignField: "REF_KennelID",
        pipeline: [
          {
            $match: {
              year: {
                $in: [2024],
              },
              dayOfYear: {
                $in: [100],
              },
            },
          },
        ],
        as: "bookings",
      },
    },
    {
      $unwind: {
        path: "$bookings",
        preserveNullAndEmptyArrays: true,
      },
    },
    {
      $group: {
        _id: "$_id",
        identifier: {
          $first: "$identifier",
        },
        active: {
          $first: "$active",
        },
        partition: {
          $first: "$partition",
        },
        length: {
          $first: "$length",
        },
        width: {
          $first: "$width",
        },
        bookings: {
          $push: "$bookings",
        },
      },
    },
    {
      $project: {
        "bookings.REF_KennelID": 0,
        "bookings.__v": 0,
      },
    },
    {
      $sort: {
        identifier: 1,
      },
    },
  ];

Solution

    1. The $match stage for kennels being active should be first in your pipeline. So the lookups won't be performed for unmatched kennels which improves performance.

    2. In the first $lookup stage, use a lookup-pipeline instead of only field-equality followed by a match for room numbers. This is needed because "I want to return is all active kennels".

      • In the current form, any active kennel which doesn't have room "1" or "2" would not show up in the results at all.
    3. Same goes for the second $lookup stage - use a pipeline to get all kennels regardless of whether they have matching bookings or not, or even any bookings.

    db.kennels.aggregate([
      { $match: { active: true } },
      {
        $lookup: {
          from: "rooms",
          let: { searchRoomID: "$REF_RoomID" },
          pipeline: [
            {
              $match: {
                $expr: {
                  $and: [
                    { $eq: ["$_id", "$$searchRoomID"] },
                    { $in: ["$number", ["1", "2"]] }
                  ]
                }
              }
            }
          ],
          as: "room"
        }
      },
      {
        $unwind: {
          path: "$room",
          preserveNullAndEmptyArrays: true
        }
      },
      {
        $lookup: {
          from: "bookings",
          let: { searchKennelID: "$_id" },
          pipeline: [
            {
              $match: {
                $expr: {
                  $and: [
                    { $eq: ["$REF_KennelID", "$$searchKennelID"] },
                    { $eq: ["$year", 2024] },
                    { $in: ["$dayOfYear", [100, 101, 102, 103]] }
                  ]
                }
              }
            }
          ],
          as: "bookings"
        }
      },
      {
        $unwind: {
          path: "$bookings",
          preserveNullAndEmptyArrays: true
        }
      },
      {
        $group: {
          _id: "$_id",
          identifier: { $first: "$identifier" },
          active: { $first: "$active" },
          partition: { $first: "$partition" },
          length: { $first: "$length" },
          width: { $first: "$width" },
          bookings: { $push: "$bookings" }
        }
      },
      {
        $project: {
          "bookings.REF_KennelID": 0,
          "bookings.__v": 0
        }
      }
    ])
    

    Mongo Playground

    Notes: