javaarraysjsonkotlin

Remove duplicate objects from JSON array in Java or Kotlin


I'm trying to figure out how to remove duplicate classInformation from schoolInformation if the below conditions are matched:

JSON:

{
  "schoolInfomration": [
    {
      "classInformation": [
        {
          "studentPercentage": 50,
          "studentInfo": {
            "id": 4,
            "style": 3,
            "graduationYear": 2028
          }
        },
        {
          "teacherPercentage": 50,
          "teacherInfo": {
            "id": "10019",
            "name" : "test2"
          }
        }
      ]
    },
    {
      "classInformation": [
        {
          "studentPercentage": 50,
          "studentInfo": {
            "id": 4,
            "style": 3,
            "graduationYear": 2028
          }
        },
        {
          "teacherPercentage": 50,
          "teacherInfo": {
            "id": "10019",
            "name" : "test1"
          }
        }
      ]
    },
    {
      "classInformation": [
        {
          "studentPercentage": 50,
          "studentInfo": {
            "id": 4,
            "style": 3,
            "graduationYear": 2023
          }
        },
        {
          "teacherPercentage": 50,
          "teacherInfo": {
            "id": "10018",
            "name": "test3"
          }
        }
      ]
    }
  ]
}

Output:

{
  "schoolInfomration": [
    {
      "classInformation": [
        {
          "studentPercentage": 50,
          "studentInfo": {
            "id": 4,
            "style": 3,
            "graduationYear": 2028
          }
        },
        {
          "teacherPercentage": 50,
          "teacherInfo": {
            "id": "10019",
            "name" : "test2"
          }
        }
      ]
    },
    {
      "classInformation": [
        {
          "studentPercentage": 50,
          "studentInfo": {
            "id": 4,
            "style": 3,
            "graduationYear": 2023
          }
        },
        {
          "teacherPercentage": 50,
          "teacherInfo": {
            "id": "10018",
            "name": "test3"
          }
        }
      ]
    }
  ]
}

Data Classes:


data class Root(
    val schoolInfomration: List<SchoolInfomration>,
)

data class SchoolInfomration(
    val classInformation: List<ClassInformation>,
)

data class ClassInformation(
    val studentPercentage: Long?,
    val studentInfo: StudentInfo?,
    val teacherPercentage: Long?,
    val teacherInfo: TeacherInfo?,
)

data class StudentInfo(
    val id: Long,
    val style: Long,
    val graduationYear: Long,
)

data class TeacherInfo(
    val id: String,
    val name: String,
)

Solution

  • You could use following stack:

    @Test
    fun `remove duplicates by fields of objects in the inner collection`() {
        val mapper = ObjectMapper().registerKotlinModule()
        val input = mapper.readValue<Root>(inputString)
    
        val withoutDuplicates = input.copy(
            schoolInfomration = input.schoolInfomration
                .distinctBy { schoolInfomration ->
                    schoolInfomration.classInformation
                        .map { classInformation ->
                            // use list to take into account order of classInformation's fields
                            listOf(
                                classInformation.teacherPercentage,
                                classInformation.teacherInfo?.id,
                                classInformation.studentPercentage,
                                classInformation.studentInfo?.id,
                                classInformation.studentInfo?.graduationYear,
                                classInformation.studentInfo?.style
                            )
                        }
                        // use set in order to ignore order of classInformation
                        .toSet()
                }
        )
    
        val output = mapper.readValue<Root>(outputString)
        assertEquals(output, withoutDuplicates)
    }