[SOLVED] Elasticsearch aggregation with date_histogram gives wrong result for buckets

Elasticsearch aggregation with date_histogram gives wrong result for buckets

I have data with timestamp. I want to do date_histogram on that.

When I run the query it return total as 13 which is correct, but it shows one record in 2014-10-10, but I cant find that record in data I have.

curl http://localhost:9200/test/test/_search -X POST -d '{"fields":
 ["creation_time"],
  "query" :
      {"filtered":
          {"query":
              {"match":
                  {"type": "test.type"}
              }
          }
      },
  "aggs":
      {"group_by_created_by":
          {"date_histogram":
              {"field":"creation_time", "interval": "1d"}
          }
      }
 }' | python -m json.tool
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2083  100  1733  100   350   234k  48590 --:--:-- --:--:-- --:--:--  241k
{
    "_shards": {
        "failed": 0,
        "successful": 5,
        "total": 5
    },
    "aggregations": {
        "group_by_created_at": {
            "buckets": [
                {
                    "doc_count": 12,
                    "key": 1412812800000,
                    "key_as_string": "2014-10-09T00:00:00.000Z"
                },
                {
                    "doc_count": 1,
                    "key": 1412899200000,
                    "key_as_string": "2014-10-10T00:00:00.000Z"
                }
            ]
        }
    },
    "hits": {
        "hits": [
            {
                "_id": "qk5EGDqUSoW-ckZU9bnSsA",
                "_index": "test",
                "_score": 3.730029,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T16:35:39.535389"
                    ]
                }
            },
            {
                "_id": "GnglI_3xRYii_oE5q91FUg",
                "_index": "test",
                "_score": 3.6149597,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T17:16:55.677919"
                    ]
                }
            },
            {
                "_id": "ELP1f_-IS8SJiT4i4Vh6_g",
                "_index": "test",
                "_score": 2.974081,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T01:21:21.691270"
                    ]
                }
            },
            {
                "_id": "ySlIV4vWRvm_q0-9p87dEQ",
                "_index": "test",
                "_score": 2.974081,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T01:33:51.291644"
                    ]
                }
            },
            {
                "_id": "swXVnMmJSsmNW30zeJvCoQ",
                "_index": "test",
                "_score": 2.974081,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T17:08:45.738821"
                    ]
                }
            },
            {
                "_id": "h0j6L-VGTnyChSIevtt2og",
                "_index": "test",
                "_score": 2.974081,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T22:35:16.908080"
                    ]
                }
            },
            {
                "_id": "ANoTEXIgRgml6gLD4YKtIg",
                "_index": "test",
                "_score": 2.9459102,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T01:25:18.869175"
                    ]
                }
            },
            {
                "_id": "FSCPBsogT5OXghBUmKXidQ",
                "_index": "test",
                "_score": 2.9459102,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T01:42:49.000599"
                    ]
                }
            },
            {
                "_id": "VEw6XbIySvW7h7GF7h4ynA",
                "_index": "test",
                "_score": 2.9459102,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T16:45:51.563595"
                    ]
                }
            },
            {
                "_id": "J9NfffAvRPmFxtOBZ6IsCA",
                "_index": "test",
                "_score": 2.9169223,
                "_type": "test",
                "fields": {
                    "creation_time": [
                        "2014-10-09T01:23:30.546353"
                    ]
                }
            }
        ],
        "max_score": 3.730029,
        "total": 13
    },
    "timed_out": false,
    "took": 4
}

If you see the above examples, then there is no record on 10-10 but aggregation shows one record in that bucket.

Solution

Aggregations are done on all matching documents.

You do not set the size which means you the default 10 documents under hits. Change the size to 13(+) and your 2014-10-10 document should show.

When you have more results, which will make it unhandy to manually check all results, you can also use top_hits as a sub-aggregator to get a peak of what is in the bucket (there's a size option there as well).