visualizationlooker-studiovegaword-cloud

Possible to reduce the number of Words in a Vega wordcloud?


Situation

I'm using the Vega visualization grammar tool in Looker Studio. Specifically, the wordcloud chart.

Documentation: https://vega.github.io/vega/docs/transforms/wordcloud/

What I've tried / Results

The word clouds I'm generating have too many words in them, and I'm trying to figure out how to reduce that.

Manipulating the example editor on that page, I can sort of get the result I want by setting the 'wordpadding' to 5, but this seems to just push some of the words out of view, versus reduce the number of words and then distributing those words across the size spectrum.

I've also tried changing the 'padding' in the Style Properties in Looker studio, but it's not changing anything about the chart.

Any input on this would be greatly appreciated.

Updating with current code

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "data": [
    {
     "name": "default",
      "transform": [
        {
          "type": "formula", "as": "rotate",
          "expr": "[0, 90][~~(datum.index % 2)]"
        },
  {
          "type": "formula", "as": "weight",
          "expr": "if(datum.index==0, 600, 400)"
        },
        {
          "type": "wordcloud",
          "size": [{"signal": "width"}, {"signal": "height"}],
          "text": {"field": "$dimension0"},
          "fontSize": {"field": "$metric0"},
          "fontWeight": {"field": "weight"},
          "fontSizeRange": [{"signal": "(width+height)/96"}, {"signal": "(width+height)/24"}],
          "padding": {"value": 2},
          "rotate": {"field": "rotate"}
        }
      ]
    }
  ],
  "scales": [
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "default", "field": "$dimension0"},
      "scheme": "datastudio20"
    }
  ],

  "marks": [
    {
      "type": "text",
      "from": {"data": "default"},
      "encode": {
        "enter": {
          "text": {"field": "$dimension0"},
          "align": {"value": "center"},
          "baseline": {"value": "alphabetic"},
          "fill": {"scale": "color", "field": "$dimension0"}
        },
        "update": {
          "x": {"field": "x"},
          "y": {"field": "y"},
          "angle": {"field": "angle"},
          "fontSize": {"field": "fontSize"},
         "fontWeight": {"field": "weight"},
          "fillOpacity": {"value": 0.7}
        },
        "hover": {
          "fillOpacity": {"value": 1}
        }
      }
    }
  ]
}

What I tried based on that

@Davide

From there, I found this line in your code:

{"type": "filter", "expr": "datum.row < 25"},

So I tried adding that under the "transform" section in my code.

Result

However, that only produced a blank chart.

Updated code with both 'window' and 'filter' transform

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "data": [
    {
     "name": "default",
      "transform": [
        {
          "type": "formula", "as": "rotate",
          "expr": "[0, 90][~~(datum.index % 2)]"
        },
        {
          "type": "formula", "as": "weight",
          "expr": "if(datum.index==0, 600, 400)"
        },
        {
          "type": "formula",
          "as": "weight",
          "expr": "if(datum.text=='VEGA', 600, 300)"
        },
        {
          "type": "formula",
          "as": "rotate",
          "expr": "[-rotate, 0, rotate][~~(random() * 3)]"
        },
        {
          "type": "window",
          "sort": {"field": "count", "order": "descending"},
          "ops": ["row_number"],
          "fields": [null],
          "as": ["row"]
        },
        {"type": "filter", "expr": "datum.row < 25"},
        {
          "type": "wordcloud",
          "size": [{"signal": "width"}, {"signal": "height"}],
          "text": {"field": "text"},
          "font": "Helvetica Neue, Arial",
          "fontSize": {"field": "count"},
          "fontWeight": {"field": "weight"},
          "fontSizeRange": [
            {"signal": "fontSizeRange0"},
            {"signal": "fontSizeRange1"}
          ],
          "padding": {"signal": "wordPadding"},
          "rotate": {"field": "rotate"}
        },
        {
          "type": "wordcloud",
          "size": [{"signal": "width"}, {"signal": "height"}],
          "text": {"field": "$dimension0"},
          "fontSize": {"field": "$metric0"},
          "fontWeight": {"field": "weight"},
          "fontSizeRange": [{"signal": "(width+height)/96"}, {"signal": "(width+height)/24"}],
          "padding": {"value": 2},
          "rotate": {"field": "rotate"}
        }
      ]
    }
  ],
  "scales": [
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "default", "field": "$dimension0"},
      "scheme": "datastudio20"
    }
  ],

  "marks": [
    {
      "type": "text",
      "from": {"data": "default"},
      "encode": {
        "enter": {
          "text": {"field": "$dimension0"},
          "align": {"value": "center"},
          "baseline": {"value": "alphabetic"},
          "fill": {"scale": "color", "field": "$dimension0"}
        },
        "update": {
          "x": {"field": "x"},
          "y": {"field": "y"},
          "angle": {"field": "angle"},
          "fontSize": {"field": "fontSize"},
         "fontWeight": {"field": "weight"},
          "fillOpacity": {"value": 0.7}
        },
        "hover": {
          "fillOpacity": {"value": 1}
        }
      }
    }
  ]
}

Code that worked

{
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "data": [
    {
     "name": "default",
      "transform": [
        {
          "type": "countpattern",
          "field": "$dimension0",
          "pattern": "[\\w']{3,}",
          "stopwords": "very|now|can't|are|800|every|also|ever|just|dont|don't|been|pnly|I've|I'm|you|why|try|but|was|it's|her|2021|where|com|not|for|that|from|and|out|this|the|has|have|2022|2021"
        },
        {
        "type": "formula", "as": "weight",
        "expr": "log(datum.count)*5"
        },
        {
          "type": "window",
          "sort": {"field": "count", "order": "descending"},
          "ops": ["row_number"],
          "fields": [null],
          "as": ["row"]
        },
        {"type": "filter", "expr": "datum.row < 40"},
        {
          "type": "formula", "as": "rotate",
          "expr": "[0, 90][~~(datum.count % 2)]"
        },
        {
        "type": "wordcloud",
        "size": [{"signal":"width"}, {"signal":"height"}],
        "text": {"field": "text"},
        "font": "Helvetica Neue",
        "fontSize": {"field": "count"},
        "fontWeight": {"field": "weight"},
        "fontSizeRange": [
          {"signal": "(width+height)/96"},
          {"signal": "(width+height)/24"}],
        "rotate": {"field": "rotate"},
        "padding": 2
      }
      ]
    }
  ],
  "scales": [
    {
      "name": "color",
      "type": "ordinal",
      "domain": {"data": "default", "field": "text"},
      "scheme": "set3"
    }
  ],

  "marks": [
    {
      "type": "text",
      "from": {"data": "default"},
      "encode": {
        "enter": {
          "text": {"field": "text"},
          "align": {"value": "center"},
          "baseline": {"value": "alphabetic"},
          "fill": {"scale": "color", "field": "text"}
        },
        "update": {
          "x": {"field": "x"},
          "y": {"field": "y"},
          "angle": {"field": "angle"},
          "fontSize": {"field": "fontSize"},
          "fontWeight":{"field": "weight"},
          "fillOpacity": {"value": 0.6}
        },
        "hover": {
          "fillOpacity": {"value": 1}
        }
      }
    }
  ]
}

Solution

  • enter image description here

    This example filters the top 25 words by count.

    {
      "$schema": "https://vega.github.io/schema/vega/v5.json",
      "name": "wordcloud",
      "width": 400,
      "height": 200,
      "padding": 0,
      "autosize": "none",
      "signals": [
        {
          "name": "wordPadding",
          "value": 1,
          "bind": {"input": "range", "min": 0, "max": 5, "step": 1}
        },
        {
          "name": "fontSizeRange0",
          "value": 8,
          "bind": {"input": "range", "min": 8, "max": 42, "step": 1}
        },
        {
          "name": "fontSizeRange1",
          "value": 24,
          "bind": {"input": "range", "min": 8, "max": 42, "step": 1}
        },
        {
          "name": "rotate",
          "value": 45,
          "bind": {"input": "select", "options": [0, 30, 45, 60, 90]}
        }
      ],
      "data": [
        {
          "name": "table",
          "values": [
            "Declarative visualization grammars can accelerate development, facilitate retargeting across platforms, and allow language-level optimizations. However, existing declarative visualization languages are primarily concerned with visual encoding, and rely on imperative event handlers for interactive behaviors. In response, we introduce a model of declarative interaction design for data visualizations. Adopting methods from reactive programming, we model low-level events as composable data streams from which we form higher-level semantic signals. Signals feed predicates and scale inversions, which allow us to generalize interactive selections at the level of item geometry (pixels) into interactive queries over the data domain. Production rules then use these queries to manipulate the visualization’s appearance. To facilitate reuse and sharing, these constructs can be encapsulated as named interactors: standalone, purely declarative specifications of interaction techniques. We assess our model’s feasibility and expressivity by instantiating it with extensions to the Vega visualization grammar. Through a diverse range of examples, we demonstrate coverage over an established taxonomy of visualization interaction techniques.",
            "We present Reactive Vega, a system architecture that provides the first robust and comprehensive treatment of declarative visual and interaction design for data visualization. Starting from a single declarative specification, Reactive Vega constructs a dataflow graph in which input data, scene graph elements, and interaction events are all treated as first-class streaming data sources. To support expressive interactive visualizations that may involve time-varying scalar, relational, or hierarchical data, Reactive Vega’s dataflow graph can dynamically re-write itself at runtime by extending or pruning branches in a data-driven fashion. We discuss both compile- and run-time optimizations applied within Reactive Vega, and share the results of benchmark studies that indicate superior interactive performance to both D3 and the original, non-reactive Vega system.",
            "We present Vega-Lite, a high-level grammar that enables rapid specification of interactive data visualizations. Vega-Lite combines a traditional grammar of graphics, providing visual encoding rules and a composition algebra for layered and multi-view displays, with a novel grammar of interaction. Users specify interactive semantics by composing selections. In Vega-Lite, a selection is an abstraction that defines input event processing, points of interest, and a predicate function for inclusion testing. Selections parameterize visual encodings by serving as input data, defining scale extents, or by driving conditional logic. The Vega-Lite compiler automatically synthesizes requisite data flow and event handling logic, which users can override for further customization. In contrast to existing reactive specifications, Vega-Lite selections decompose an interaction design into concise, enumerable semantic units. We evaluate Vega-Lite through a range of examples, demonstrating succinct specification of both customized interaction methods and common techniques such as panning, zooming, and linked selection."
          ],
          "transform": [
            {
              "type": "countpattern",
              "field": "data",
              "case": "upper",
              "pattern": "[\\w']{3,}",
              "stopwords": "(i|me|my|myself|we|us|our|ours|ourselves|you|your|yours|yourself|yourselves|he|him|his|himself|she|her|hers|herself|it|its|itself|they|them|their|theirs|themselves|what|which|who|whom|whose|this|that|these|those|am|is|are|was|were|be|been|being|have|has|had|having|do|does|did|doing|will|would|should|can|could|ought|i'm|you're|he's|she's|it's|we're|they're|i've|you've|we've|they've|i'd|you'd|he'd|she'd|we'd|they'd|i'll|you'll|he'll|she'll|we'll|they'll|isn't|aren't|wasn't|weren't|hasn't|haven't|hadn't|doesn't|don't|didn't|won't|wouldn't|shan't|shouldn't|can't|cannot|couldn't|mustn't|let's|that's|who's|what's|here's|there's|when's|where's|why's|how's|a|an|the|and|but|if|or|because|as|until|while|of|at|by|for|with|about|against|between|into|through|during|before|after|above|below|to|from|up|upon|down|in|out|on|off|over|under|again|further|then|once|here|there|when|where|why|how|all|any|both|each|few|more|most|other|some|such|no|nor|not|only|own|same|so|than|too|very|say|says|said|shall)"
            },
            {
              "type": "formula",
              "as": "weight",
              "expr": "if(datum.text=='VEGA', 600, 300)"
            },
            {
              "type": "formula",
              "as": "rotate",
              "expr": "[-rotate, 0, rotate][~~(random() * 3)]"
            },
            {
              "type": "window",
              "sort": {"field": "count", "order": "descending"},
              "ops": ["row_number"],
              "fields": [null],
              "as": ["row"]
            },
            {"type": "filter", "expr": "datum.row < 25"},
            {
              "type": "wordcloud",
              "size": [{"signal": "width"}, {"signal": "height"}],
              "text": {"field": "text"},
              "font": "Helvetica Neue, Arial",
              "fontSize": {"field": "count"},
              "fontWeight": {"field": "weight"},
              "fontSizeRange": [
                {"signal": "fontSizeRange0"},
                {"signal": "fontSizeRange1"}
              ],
              "padding": {"signal": "wordPadding"},
              "rotate": {"field": "rotate"}
            }
          ]
        }
      ],
      "scales": [
        {
          "name": "color",
          "type": "ordinal",
          "range": ["#d5a928", "#652c90", "#939597"]
        }
      ],
      "marks": [
        {
          "type": "text",
          "from": {"data": "table"},
          "encode": {
            "enter": {
              "text": {"field": "text"},
              "align": {"value": "center"},
              "baseline": {"value": "alphabetic"},
              "fill": {"scale": "color", "field": "text"},
              "font": {"value": "Helvetica Neue, Arial"},
              "fontWeight": {"field": "weight"}
            },
            "update": {
              "x": {"field": "x"},
              "y": {"field": "y"},
              "angle": {"field": "angle"},
              "fontSize": {"field": "fontSize"},
              "fillOpacity": {"value": 1}
            },
            "hover": {"fillOpacity": {"value": 0.5}}
          }
        }
      ]
    }