mongodbaggregation-frameworkrobo3t

MongoDB Aggregate on huge documents


I'm trying to self learn mongoDb with big mongoDB (Each document is approximately 10Mb total of 1000 documents)

I wanted to try some basics. For example listing every Activity that is done over all the users sorting it by UsedCallories.

db.getCollection('users').aggregate([
  {$group: {_id:"$Activities"}}, 
  {$sort: { UsedCallories: -1}}
],{allowDiskUse:true});

Unfortunately when I execute this script it gives me:

'Script executed successfully, but there are no results to show.'

Could you give me a pointer where I'm wrong?

Shortened example file:

{
  "Id": 1,
  "FirstName": "Casie",
  "LastName": "Crapo",
  "Email": "Casie.Crapo@databanken.db",
  "Weight": 92,
  "Length": 198,
  "Activities": [
    {
      "ActivityType": {
        "Name": "Sexual Activity",
        "CallPerSecond": 0.033333333
      },
      "StartCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "EndCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "StartDateTime": { $date: "2016-11-01T23:39:15Z" },
      "EndDateTime": { $date: "2016-11-02T02:38:45Z" },
      "UsedCallories": 772.63042705630426,
      "Measurements": [
        {
          "Heartrate": 142,
          "UnderPressure": 123,
          "Overressure": 156,
          "Speed": 0,
          "Coordinates": {
            "Lattidude": -10.81907,
            "Longitude": -16.16832
          }
        }
      ]
    }
  ]
}

Update 'ExpectedOutput':

So the expected output is just a list of ALL the activities in all the array fields from the users. Sorted on UsedCallories.

"Activities": [
    {
      "ActivityType": {
        "Name": "Sexual Activity",
        "CallPerSecond": 0.033333333
      },
      "StartCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "EndCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "StartDateTime": { $date: "2016-11-01T23:39:15Z" },
      "EndDateTime": { $date: "2016-11-02T02:38:45Z" },
      "UsedCallories": 772.63042705630426,
      "Measurements": [
        ...
      ]
    },{
      "ActivityType": {
        "Name": "Sexual Activity",
        "CallPerSecond": 0.033333333
      },
      "StartCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "EndCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "StartDateTime": { $date: "2016-11-01T23:39:15Z" },
      "EndDateTime": { $date: "2016-11-02T02:38:45Z" },
      "UsedCallories": 52.63042705630426,
      "Measurements": [
        ...
      ]
    },{
      "ActivityType": {
        "Name": "Sexual Activity",
        "CallPerSecond": 0.033333333
      },
      "StartCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "EndCoordinates": {
        "Lattidude": -10.81907,
        "Longitude": -16.16832
      },
      "StartDateTime": { $date: "2016-11-01T23:39:15Z" },
      "EndDateTime": { $date: "2016-11-02T02:38:45Z" },
      "UsedCallories": 20.22442,
      "Measurements": [
        ...
      ]
    }
  ]

Update after Duplicate Question

Okay thanks for the reference to the duplicate post. It is not the same question though.

I managed to use some of it to actually get some results. The query changed to:

db.getCollection('users').aggregate([
    {$unwind: '$Activities'}, 
    {$sort: {'Activities.UsedCallories': -1}}, 
    {$group: {_id: '$_id', 'Activities': {$push: '$Activities'}}}
    ], {
  allowDiskUse:true
 })

Which now returns all the activities GROUPED BY USER and I would prefer just a LIST of all these activities not grouped by User


Solution

  • Thanks @chridam. Adding my comment as an answer.

    db.getCollection('users').aggregate([{
        $unwind: "$Activities"
    }, {
        $sort: {
            "Activities.UsedCallories": -1
        }
    }, {
        $group: {
            _id: null,
            Activities: {
                $push: "$Activities"
            }
        }
    }, {
        $project: {
            _id: 0,
            Activities: 1
        }
    }], {
        allowDiskUse: true
    });