scalaapache-sparkelasticsearch-hadoop

Spark Scala - How to construct Scala Map from nested JSON?


I've a nested json data with nested fields that I want to extract and construct a Scala Map.

Heres the sample JSON:

"nested_field": [
  {
    "airport": "sfo",
    "score": 1.0
  },
  {
    "airport": "phx",
    "score": 1.0
  },
  {
    "airport": "sjc",
    "score": 1.0
  }
]

I want to use saveToES() and construct a Scala Map to index the field into ES index with mapping as below:

 "nested_field": {
    "properties": {
      "score": {
        "type": "double"
      },
      "airport": {
        "type": "keyword",
        "ignore_above": 1024
      }
    }
  }

The json file is read into the dataframe using spark.read.json("example.json"). Whats the right way to construct the Scala Map in this case?

Thanks for any help!


Solution

  • You can do it using the below sample code

    import org.json4s.DefaultFormats
    import org.json4s.jackson.JsonMethods.parse
    
    
    
      case class AirPortScores(airport: String, score: Double)
      case class JsonRulesHandler(airports: List[AirPortScores])
    
      val jsonString: String = """{"airports":[{"airport":"sfo","score":1},{"airport":"phx","score":1},{"airport":"sjc","score":1}]}"""
    
      def loadJsonString(JsonString: String): JsonRulesHandler = {
      implicit val formats: DefaultFormats.type = org.json4s.DefaultFormats
      parse(JsonString).extract[JsonRulesHandler]
    }
    
    val parsedJson: JsonRulesHandler = loadJsonString(jsonString)
    parsedJson.airports.foreach(println)//you can select parsedJson.airport or scores
    //below ouput
    AirPortScores(sfo,1.0)
    AirPortScores(phx,1.0)
    AirPortScores(sjc,1.0)