mongodbmongodb-compass

Group by time intervals, count documents and get time of interval in MogoDB


I have a collection in mongoDB whose documents have the following structure:

    _id:6591f2fe8424cddac5b1d4aa
    captureTime:1704063737
    deviceId:200220
    sensorId:1431

I have this code in the aggregation group stage to count the documents in 1 hour time intervals group:

{
"_id": {
  "sensorId":"$sensorId",
  "year": { "$year": { "$toDate": { "$multiply": ["$captureTime", 1000] } } },
  "month": { "$month": { "$toDate": { "$multiply": ["$captureTime", 1000] } } },
  "day": { "$dayOfMonth": { "$toDate": { "$multiply": ["$captureTime", 1000] } } },
  "hour": { "$hour": { "$toDate": { "$multiply": ["$captureTime", 1000] } } },
  "interval": {
      "$subtract": [
        {"$minute": { "$toDate": { "$multiply": ["$captureTime", 1000] } }},
        {"$mod": [{"$minute": { "$toDate": { "$multiply": ["$captureTime", 1000] } }}, 60]}
      ]
    }
},
"count": { "$sum": 1 },
"time_f": { "$first": { "$dateToString": {
  "format": "%Y:%m:%d %H:%M:%S", 
  "date": { "$toDate": { "$multiply": ["$captureTime", 1000] } } }}},
"time_l": { "$last": { "$dateToString": {
  "format": "%Y:%m:%d %H:%M:%S", 
  "date": { "$toDate": { "$multiply": ["$captureTime", 1000] } } }}},
"device_Id_f": {"$first": "$deviceId"},
"sensor_Id_f": {"$first": "$sensorId"},



}

The output documents are as follows

_id: Object
count: 336
time_f: "2024:01:01 00:00:20"
time_l: "2024:01:01 00:58:35"
device_Id_f: 200220
sensor_Id_f: 1431

_id: Object
count: 18
time_f: "2024:01:01 01:00:17"
time_l: "2024:01:01 01:56:18"
device_Id_f: 200220
sensor_Id_f: 1431
time_interval: "2024:01:01 02:00:00"

However, in the output documents I would like to have the interval to which it belongs. For example, something like this:

_id: Object
count: 336
time_f: "2024:01:01 00:00:20"
time_l: "2024:01:01 00:58:35"
device_Id_f: 200220
sensor_Id_f: 1431
time_interval: "2024:01:01 01:00:00"

_id: Object
count: 18
time_f: "2024:01:01 01:00:17"
time_l: "2024:01:01 01:56:18"
device_Id_f: 200220
sensor_Id_f: 1431
time_interval: "2024:01:01 02:00:00"

I know this is something I could do from python in my application, but I think if I could do it from MongoDB it would be better.


Solution

  • One option is to simplify it by using $dateTrunc and another $set step to avoid doing the same calculation on each document:

    db.collection.aggregate([
      {$group: {
          _id: {
            sensorId: "$sensorId",
            interval: {$dateTrunc: {
                date: {$toDate: {$multiply: ["$captureTime", 1000]}},
                unit: "hour"
            }}
          },
          count: {$sum: 1},
          time_f: {$first: { $dateToString: {
              format: "%Y:%m:%d %H:%M:%S", 
              date: {$toDate: {$multiply: ["$captureTime", 1000] } } }}},
          time_l: {$last: {$dateToString: {
              format: "%Y:%m:%d %H:%M:%S", 
              date: {$toDate: {$multiply: ["$captureTime", 1000] } } }}},
          device_Id_f: {$first: "$deviceId"},
          sensor_Id_f: {$first: "$sensorId"},
      }},
      {$set: {
          _id: {
            sensorId: "$_id.sensorId",
            interval: {$dateToString: {format: "%Y:%m:%d %H:%M:%S", date: "$_id.interval"}},
            year: {$year: "$_id.interval"},
            month: {$month: "$_id.interval"},
            day: {$dayOfMonth: "$_id.interval"},
            hour: {$hour: "$_id.interval"}
          }
      }},
      {$set: {time_interval: "$_id.interval"}} // if you want it also outside of the `_id`
    ])
    

    See How it works on the mongoDB playground