Using VB.NET, I receive a list of objects. From this object list I need to clear out duplicate data sets so that the list is unique based on all columns. Reading around on StackOverflow, the general idea is to use GroupBy
followed by Select
however whenever I attempt to do this, the whole collection is returned still.
For this given example, I'd expect the count in uniqueData to be 3 but it remains at 4. dataSet1 and dataSet2 should be seen as a duplicate of each other
Module Program
Sub Main(args As String())
Dim data As New PersonCollection With {.PersonCollection = New List(Of PersonInfo)}
Dim dataSet1 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
Dim dataSet2 As New PersonInfo With {.FirstName = "John", .LastName = "Hurt", .Rating = 20}
Dim dataSet3 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 30}
Dim dataSet4 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
data.PersonCollection.Add(dataSet1)
data.PersonCollection.Add(dataSet2)
data.PersonCollection.Add(dataSet3)
data.PersonCollection.Add(dataSet4)
Dim uniqueData = data.PersonCollection.GroupBy(Function(x) New With {x.FirstName, x.LastName, x.Rating}).Select(Function(x) x.First).ToList()
Console.ReadLine()
End Sub
Private Class PersonCollection
Property PersonCollection As List(Of PersonInfo)
End Class
Private Class PersonInfo
Property FirstName As String
Property LastName As String
Property Rating As Integer
End Class
End Module
It works with tuples:
Module Program
Sub Main(args As String())
Dim data As New PersonCollection With {.PersonCollection = New List(Of PersonInfo)}
Dim dataSet1 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
Dim dataSet2 As New PersonInfo With {.FirstName = "John", .LastName = "Hurt", .Rating = 20}
Dim dataSet3 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 30}
Dim dataSet4 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
data.PersonCollection.Add(dataSet1)
data.PersonCollection.Add(dataSet2)
data.PersonCollection.Add(dataSet3)
data.PersonCollection.Add(dataSet4)
Dim uniqueData = data.PersonCollection _
.GroupBy(Function(x) New Tuple(Of String, String, Integer)(
x.FirstName, x.LastName, x.Rating)) _
.Select(Function(x) x.First).ToList()
Console.ReadLine()
End Sub
Private Class PersonCollection
Property PersonCollection As List(Of PersonInfo)
End Class
Private Class PersonInfo
Property FirstName As String
Property LastName As String
Property Rating As Integer
End Class
End Module
But it is better to use a Comparer
:
Module Program
Sub Main(args As String())
Dim data As New PersonCollection With {.PersonCollection = New List(Of PersonInfo)}
Dim dataSet1 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
Dim dataSet2 As New PersonInfo With {.FirstName = "John", .LastName = "Hurt", .Rating = 20}
Dim dataSet3 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 30}
Dim dataSet4 As New PersonInfo With {.FirstName = "Bob", .LastName = "Smith", .Rating = 10}
data.PersonCollection.Add(dataSet1)
data.PersonCollection.Add(dataSet2)
data.PersonCollection.Add(dataSet3)
data.PersonCollection.Add(dataSet4)
Dim comparer As New PersonInfoComparer
Dim uniqueData = data.PersonCollection.Distinct(comparer).ToList
Console.ReadLine()
End Sub
Private Class PersonCollection
Property PersonCollection As List(Of PersonInfo)
End Class
Private Class PersonInfo
Property FirstName As String
Property LastName As String
Property Rating As Integer
End Class
Private Class PersonInfoComparer : Inherits EqualityComparer(Of PersonInfo)
Public Overrides Function Equals(x As PersonInfo, y As PersonInfo) As Boolean
Return x.FirstName.Equals(y.FirstName) _
AndAlso x.LastName.Equals(y.LastName) _
AndAlso x.Rating.Equals(y.Rating)
End Function
Public Overrides Function GetHashCode(obj As PersonInfo) As Integer
#If NETSTANDARD2_0_OR_GREATER Or NETCOREAPP2_1_OR_GREATER Then
Return HashCode.Combine(obj.FirstName, obj.LastName, obj.Rating)
#Else
Return obj.FirstName.GetHashCode() _
Xor obj.LastName.GetHashCode() _
Xor obj.Rating.GetHashCode()
#End If
End Function
End Class
End Module