jsonchartsvisualizationvega-litevega

aggregation of data points inside a mark


Using Vega-Lite I can use aggregate inside an encoding like so:

{
  "data": {
    "name": "thedata",
    "values": [
      {"id": "house1", "no of rooms": 2},
      {"id": "house1", "no of rooms": 3},
      {"id": "house2", "no of rooms": 4},
      {"id": "house3", "no of rooms": 8}
    ]
  },
  "transform": [
    {"calculate": "datum['no of rooms'] <= 5 ? 'small' : 'large'", "as": "myType"}
  ],
  "mark": {
    "type": "circle"
  },
  "encoding": {
    "y": {
      "field": "id", "type": "nominal"
    },
    "x": {
      "field": "no of rooms", "type": "quantitative",
      "aggregate": "mean"
    },
    "size": {"value": 200}
  }
}

Now I want to know if this is possible using Vega. Or do I have to rethink my approach and create an aggregate inside a transform, or inside the mark section?

My naive approach was to use

"x": "signal": "scale(myscale, sum(datum['no of rooms']))"

Can you please provide me some guidance?


Solution

  • The answer is no. VL provides a more succinct grammar which is transpiled to Vega. In these situations, it is useful to see the Vega being generated. In the editor, click edit Vega spec to see it. Here an aggregate transform was used.

    enter image description here

    {
      "$schema": "https://vega.github.io/schema/vega/v5.json",
      "background": "white",
      "padding": 5,
      "width": 200,
      "style": "cell",
      "data": [
        {
          "name": "thedata",
          "values": [
            {"id": "house1", "no of rooms": 2},
            {"id": "house1", "no of rooms": 3},
            {"id": "house2", "no of rooms": 4},
            {"id": "house3", "no of rooms": 8}
          ]
        },
        {
          "name": "data_0",
          "source": "thedata",
          "transform": [
            {
              "type": "formula",
              "expr": "datum['no of rooms'] <= 5 ? 'small' : 'large'",
              "as": "myType"
            },
            {
              "type": "aggregate",
              "groupby": ["id"],
              "ops": ["mean"],
              "fields": ["no of rooms"],
              "as": ["mean_no of rooms"]
            },
            {
              "type": "filter",
              "expr": "isValid(datum[\"mean_no of rooms\"]) && isFinite(+datum[\"mean_no of rooms\"])"
            }
          ]
        }
      ],
      "signals": [
        {"name": "y_step", "value": 20},
        {
          "name": "height",
          "update": "bandspace(domain('y').length, 1, 0.5) * y_step"
        }
      ],
      "marks": [
        {
          "name": "marks",
          "type": "symbol",
          "style": ["circle"],
          "from": {"data": "data_0"},
          "encode": {
            "update": {
              "fill": {"value": "#4c78a8"},
              "ariaRoleDescription": {"value": "circle"},
              "description": {
                "signal": "\"Mean of no of rooms: \" + (format(datum[\"mean_no of rooms\"], \"\")) + \"; id: \" + (isValid(datum[\"id\"]) ? datum[\"id\"] : \"\"+datum[\"id\"])"
              },
              "x": {"scale": "x", "field": "mean_no of rooms"},
              "y": {"scale": "y", "field": "id"},
              "size": {"value": 200},
              "shape": {"value": "circle"}
            }
          }
        }
      ],
      "scales": [
        {
          "name": "x",
          "type": "linear",
          "domain": {"data": "data_0", "field": "mean_no of rooms"},
          "range": [0, {"signal": "width"}],
          "nice": true,
          "zero": true
        },
        {
          "name": "y",
          "type": "point",
          "domain": {"data": "data_0", "field": "id", "sort": true},
          "range": {"step": {"signal": "y_step"}},
          "padding": 0.5
        }
      ],
      "axes": [
        {
          "scale": "x",
          "orient": "bottom",
          "gridScale": "y",
          "grid": true,
          "tickCount": {"signal": "ceil(width/40)"},
          "domain": false,
          "labels": false,
          "aria": false,
          "maxExtent": 0,
          "minExtent": 0,
          "ticks": false,
          "zindex": 0
        },
        {
          "scale": "x",
          "orient": "bottom",
          "grid": false,
          "title": "Mean of no of rooms",
          "labelFlush": true,
          "labelOverlap": true,
          "tickCount": {"signal": "ceil(width/40)"},
          "zindex": 0
        },
        {"scale": "y", "orient": "left", "grid": false, "title": "id", "zindex": 0}
      ]
    }