ruby-on-railselasticsearchelasticsearch-rails

Handle large amount of shards in elasticsearch


Every shop has its own set of inventory which is stored in elasticsearch as one index. Currently, I have 11 thousand shops that mean it has 11 thousand shards with indices which are difficult to fetch data in 32 GB ram server.

Can someone guide how to store an inventory of every shop in elasticsearch as creating separate indices for every shop inventory is not helping me?

Below are the mappings of the index. Elasticsearch version using is 6.0.1

{
  "staging_shop_inventory_558" : {
    "mappings" : {
      "shop_inventory" : {
        "properties" : {
          "alternate_name" : {
            "type" : "text",
            "analyzer" : "standard"
          },
          "brand" : {
            "properties" : {
              "created_at" : {
                "type" : "date"
              },
              "id" : {
                "type" : "integer"
              },
              "image" : {
                "type" : "text",
                "index" : false
              },
              "is_selected" : {
                "type" : "boolean"
              },
              "name" : {
                "type" : "text",
                "analyzer" : "standard"
              },
              "updated_at" : {
                "type" : "date"
              }
            }
          },
          "brand_autocomplete" : {
            "type" : "text",
            "analyzer" : "autocomplete",
            "search_analyzer" : "autocomplete_search"
          },
          "brand_suggest" : {
            "type" : "text",
            "analyzer" : "ngram_analyzer"
          },
          "category" : {
            "properties" : {
              "id" : {
                "type" : "integer"
              },
              "image" : {
                "type" : "text",
                "index" : false
              },
              "name" : {
                "type" : "text",
                "analyzer" : "standard"
              }
            }
          },
          "created_at" : {
            "type" : "date"
          },
          "deleted_at" : {
            "type" : "date"
          },
          "id" : {
            "type" : "integer"
          },
          "image" : {
            "type" : "text",
            "index" : false
          },
          "is_deleted" : {
            "type" : "boolean"
          },
          "name" : {
            "type" : "text",
            "fields" : {
              "raw" : {
                "type" : "keyword"
              }
            },
            "analyzer" : "gramAnalyzer",
            "search_analyzer" : "whitespace_analyzer"
          },
          "name_autocomplete" : {
            "type" : "text",
            "analyzer" : "autocomplete",
            "search_analyzer" : "autocomplete_search"
          },
          "name_suggest" : {
            "type" : "text",
            "analyzer" : "ngram_analyzer"
          },
          "product_id" : {
            "type" : "integer"
          },
          "product_sizes" : {
            "type" : "nested",
            "properties" : {
              "deleted_at" : {
                "type" : "date"
              },
              "description" : {
                "type" : "text",
                "fields" : {
                  "keyword" : {
                    "type" : "keyword",
                    "ignore_above" : 256
                  }
                }
              },
              "ean_code" : {
                "type" : "keyword"
              },
              "id" : {
                "type" : "integer"
              },
              "in_stock" : {
                "type" : "boolean"
              },
              "is_deleted" : {
                "type" : "boolean"
              },
              "price" : {
                "type" : "float"
              },
              "product_id" : {
                "type" : "long"
              },
              "product_update_on" : {
                "type" : "date"
              },
              "product_update_status" : {
                "type" : "integer"
              },
              "uom" : {
                "type" : "keyword"
              },
              "weight" : {
                "type" : "float"
              }
            }
          },
          "sub_category" : {
            "properties" : {
              "created_at" : {
                "type" : "date"
              },
              "id" : {
                "type" : "integer"
              },
              "image" : {
                "type" : "text",
                "index" : false
              },
              "is_selected" : {
                "type" : "boolean"
              },
              "name" : {
                "type" : "text",
                "analyzer" : "standard"
              },
              "updated_at" : {
                "type" : "date"
              }
            }
          },
          "sub_category_suggest" : {
            "type" : "text",
            "analyzer" : "gramAnalyzer",
            "search_analyzer" : "whitespace_analyzer"
          }
        }
      }
    }
  }
}

Solution

  • Wow having 11k shards(there might be more based on the replica factor) in your node which has just 32 GB RAM(again note this is not the JVM allocated to Elasticsearch process) whose performance greatly depends on the JVM size and beyond 32GB deteriorates.

    Elasticsearch is a distributed system and can be easily scaled to thousands of nodes, you should add more nodes in your cluster and distribute your Elasticsearch indices to all the nodes in the cluster.