vega-litevega-lite-api

Advanced Mixed Charting in Vega-Lite (Points + GroupedBar)


I'm trying to make a chart with vega-lite.

  1. On X-axis, there is going to be nominal data, days 1 ~ 4.
  2. Each day needs to have 3 bars, the values being medians of Ramos, PBMC, Ramos_PBMC.
  3. Each bar should have 3 points, which are actual sample data points (from which the median is derived).

I've come this far (Vega-Lite Online Editor):

{
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": {
    "values": [
      {"key":"Ctrl","RAMOS":{"SAMPLE_1":2382,"SAMPLE_2":1998,"SAMPLE_3":2233,"MEDIAN":2233},"PBMC":{"SAMPLE_1":2,"SAMPLE_2":0,"SAMPLE_3":0,"MEDIAN":0},"RAMOS_PBMC":{"SAMPLE_1":213,"SAMPLE_2":240,"SAMPLE_3":153,"MEDIAN":213},"day":1,"R":4},
{"key":"Ctrl","RAMOS":{"SAMPLE_1":4151,"SAMPLE_2":4451,"SAMPLE_3":4662,"MEDIAN":4451},"PBMC":{"SAMPLE_1":6,"SAMPLE_2":1,"SAMPLE_3":0,"MEDIAN":1},"RAMOS_PBMC":{"SAMPLE_1":178,"SAMPLE_2":140,"SAMPLE_3":173,"MEDIAN":173},"day":2,"R":4},
{"key":"Ctrl","RAMOS":{"SAMPLE_1":11195,"SAMPLE_2":12059,"SAMPLE_3":12089,"MEDIAN":12059},"PBMC":{"SAMPLE_1":16,"SAMPLE_2":1,"SAMPLE_3":1,"MEDIAN":1},"RAMOS_PBMC":{"SAMPLE_1":278,"SAMPLE_2":274,"SAMPLE_3":299,"MEDIAN":278},"day":3,"R":4},
{"key":"Ctrl","RAMOS":{"SAMPLE_1":17931,"SAMPLE_2":21797,"SAMPLE_3":23520,"MEDIAN":21797},"PBMC":{"SAMPLE_1":23,"SAMPLE_2":20,"SAMPLE_3":8,"MEDIAN":20},"RAMOS_PBMC":{"SAMPLE_1":810,"SAMPLE_2":467,"SAMPLE_3":858,"MEDIAN":810},"day":4,"R":4}
    ]
  },
  "transform": [
    {
      "fold": [
        "RAMOS.MEDIAN",
        "PBMC.MEDIAN",
        "RAMOS_PBMC.MEDIAN"
      ],
      "as": ["Category", "Median"]
    },
    {
      "fold": [
        "RAMOS.SAMPLE_1",
        "RAMOS.SAMPLE_2",
        "RAMOS.SAMPLE_3"
      ],
      "as": ["RamosSample", "RamosSampleValue"]
    },
    {
      "fold": [
        "PBMC.SAMPLE_1",
        "PBMC.SAMPLE_2",
        "PBMC.SAMPLE_3"
      ],
      "as": ["PBMCSample", "PBMCSampleValue"]
    },
    {
      "fold": [
        "RAMOS_PBMC.SAMPLE_1",
        "RAMOS_PBMC.SAMPLE_2",
        "RAMOS_PBMC.SAMPLE_3"
      ],
      "as": ["RamosPBMCSample", "RamosPBMCSampleValue"]
    },
  ],
  "layer": [
    {
      "mark": "bar",
      "encoding": {
        "x": {
          "field": "day",
          "type": "ordinal",
          "axis": {"title": "Day"}
        },
        "y": {
          "field": "Median",
          "type": "quantitative",
          "scale": {"type": "sqrt"},
          "axis": {"title": "Cell Count"}
        },
        "color": {
          "field": "Category",
          "type": "nominal",
          "scale": {"range": ["#1f77b4", "#ff7f0e", "#2ca02c"]},
          "legend": {"title": "Category"}
        },
        "xOffset": {"field": "Category"}
      }
    },
    {
      "mark": {
        "type":"point",
        "filled": true
      },
      "encoding": {
        "x": {
          "field": "day",
          "type": "ordinal"
        },
        "y": {
          "field": "RamosSampleValue",
          "type": "quantitative"
        },
        "color": {
          "field": "RamosSample",
          "type": "nominal",
          "legend": null
        },
        "xOffset": {
          "field": "Category"
        }
      }
    },
    {
      "mark": {
        "type":"point",
        "filled": true
      },
      "encoding": {
        "x": {
          "field": "day",
          "type": "ordinal"
        },
        "y": {
          "field": "PBMCSampleValue",
          "type": "quantitative"
        },
        "color": {
          "field": "PBMCSample",
          "type": "nominal",
          "legend": null
        },
        "xOffset": {
          "field": "Category"
        }
      }
    },
    {
      "mark": {
        "type":"point",
        "filled": true
      },
      "encoding": {
        "x": {
          "field": "day",
          "type": "ordinal"
        },
        "y": {
          "field": "RamosPBMCSampleValue",
          "type": "quantitative"
        },
        "color": {
          "field": "RamosPBMCSample",
          "type": "nominal",
          "legend": null
        },
        "xOffset": {
          "field": "Category"
        }
      }
    }
  ]
}

The output: enter image description here

However, I cannot seem to be able to show just 3 data points on each bar. It is showing all 9 data points on each bar. What is a way I can achieve this?

It would also be helpful if I could translate this into vega-lite-api syntax so I can use it in my typescript file (without writing JSON directly).


Solution

  • Try this:

    {
      "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
      "width": 600,
      "height": 600,
      "data": {
        "values": [
          {
            "key": "Ctrl",
            "RAMOS": {
              "SAMPLE_1": 2382,
              "SAMPLE_2": 1998,
              "SAMPLE_3": 2233,
              "MEDIAN": 2233
            },
            "PBMC": {"SAMPLE_1": 2, "SAMPLE_2": 0, "SAMPLE_3": 0, "MEDIAN": 0},
            "RAMOS_PBMC": {
              "SAMPLE_1": 213,
              "SAMPLE_2": 240,
              "SAMPLE_3": 153,
              "MEDIAN": 213
            },
            "day": 1,
            "R": 4
          },
          {
            "key": "Ctrl",
            "RAMOS": {
              "SAMPLE_1": 4151,
              "SAMPLE_2": 4451,
              "SAMPLE_3": 4662,
              "MEDIAN": 4451
            },
            "PBMC": {"SAMPLE_1": 6, "SAMPLE_2": 1, "SAMPLE_3": 0, "MEDIAN": 1},
            "RAMOS_PBMC": {
              "SAMPLE_1": 178,
              "SAMPLE_2": 140,
              "SAMPLE_3": 173,
              "MEDIAN": 173
            },
            "day": 2,
            "R": 4
          },
          {
            "key": "Ctrl",
            "RAMOS": {
              "SAMPLE_1": 11195,
              "SAMPLE_2": 12059,
              "SAMPLE_3": 12089,
              "MEDIAN": 12059
            },
            "PBMC": {"SAMPLE_1": 16, "SAMPLE_2": 1, "SAMPLE_3": 1, "MEDIAN": 1},
            "RAMOS_PBMC": {
              "SAMPLE_1": 278,
              "SAMPLE_2": 274,
              "SAMPLE_3": 299,
              "MEDIAN": 278
            },
            "day": 3,
            "R": 4
          },
          {
            "key": "Ctrl",
            "RAMOS": {
              "SAMPLE_1": 17931,
              "SAMPLE_2": 21797,
              "SAMPLE_3": 23520,
              "MEDIAN": 21797
            },
            "PBMC": {"SAMPLE_1": 23, "SAMPLE_2": 20, "SAMPLE_3": 8, "MEDIAN": 20},
            "RAMOS_PBMC": {
              "SAMPLE_1": 810,
              "SAMPLE_2": 467,
              "SAMPLE_3": 858,
              "MEDIAN": 810
            },
            "day": 4,
            "R": 4
          }
        ]
      },
      "transform": [
        {
          "fold": ["RAMOS.MEDIAN", "PBMC.MEDIAN", "RAMOS_PBMC.MEDIAN"],
          "as": ["Category", "Median"]
        },
        {
          "fold": ["RAMOS.SAMPLE_1", "RAMOS.SAMPLE_2", "RAMOS.SAMPLE_3"],
          "as": ["RamosSample", "RamosSampleValue"]
        },
        {
          "fold": ["PBMC.SAMPLE_1", "PBMC.SAMPLE_2", "PBMC.SAMPLE_3"],
          "as": ["PBMCSample", "PBMCSampleValue"]
        },
        {
          "fold": [
            "RAMOS_PBMC.SAMPLE_1",
            "RAMOS_PBMC.SAMPLE_2",
            "RAMOS_PBMC.SAMPLE_3"
          ],
          "as": ["RamosPBMCSample", "RamosPBMCSampleValue"]
        }
      ],
      "layer": [
        {
          "mark": "bar",
          "encoding": {
            "x": {"field": "day", "type": "ordinal", "axis": {"title": "Day"}},
            "y": {
              "field": "Median",
              "type": "quantitative",
              "aggregate": "max",
              "scale": {"type": "sqrt"},
              "axis": {"title": "Cell Count"}
            },
            "color": {
              "field": "Category",
              "type": "nominal",
              "scale": {"range": ["#1f77b4", "#ff7f0e", "#2ca02c"]},
              "legend": {"title": "Category"}
            },
            "xOffset": {"field": "Category"}
          }
        },
        {
          "transform": [{"filter": "datum.Category =='RAMOS.MEDIAN'"}],
          "mark": {"type": "point", "filled": true, "size": 30},
          "encoding": {
            "x": {"field": "day", "type": "ordinal", "axis": {"title": "Day"}},
            "y": {"field": "RamosSampleValue", "type": "quantitative"},
            "color": {"value": "Black"},
            "xOffset": {"field": "Category"}
          }
        },
        {
          "transform": [{"filter": "datum.Category =='PBMC.MEDIAN'"}],
          "mark": {"type": "point", "filled": true, "size": 30},
          "encoding": {
            "x": {"field": "day", "type": "ordinal"},
            "y": {"field": "PBMCSampleValue", "type": "quantitative"},
            "color": {"value": "Black"},
            "xOffset": {"field": "Category"}
          }
        },
        {
          "transform": [{"filter": "datum.Category =='RAMOS_PBMC.MEDIAN'"}],
          "mark": {"type": "point", "filled": true, "size": 30},
          "encoding": {
            "x": {"field": "day", "type": "ordinal", "axis": {"title": "Day"}},
            "y": {"field": "RamosPBMCSampleValue", "type": "quantitative"},
            "color": {"value": "Black"},
            "xOffset": {"field": "Category"}
          }
        }
      ]
    }