iosarraysswiftnsarray

How to group by the elements of an array in Swift


Let's say that I have this code:

class Stat {
   var statEvents : [StatEvents] = []
}

struct StatEvents {
   var name: String
   var date: String
   var hours: Int
}


var currentStat = Stat()

currentStat.statEvents = [
   StatEvents(name: "lunch", date: "01-01-2015", hours: 1),
   StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
   StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
   StatEvents(name: "lunch", date: "01-01-2015", hours: 1),
   StatEvents(name: "dinner", date: "01-01-2015", hours: 1)
]

var filteredArray1 : [StatEvents] = []
var filteredArray2 : [StatEvents] = []

I could call as many times manually the next function in order to have 2 arrays grouped by "same name".

filteredArray1 = currentStat.statEvents.filter({$0.name == "dinner"})
filteredArray2 = currentStat.statEvents.filter({$0.name == "lunch"})

The problem is that I won't know the variable value, in this case "dinner" and "lunch", so I would like to group this array of statEvents automatically by name, so I get as many arrays as the name gets different.

How could I do that?


Solution

  • Swift 4:

    Since Swift 4, this functionality has been added to the standard library. You can use it like so:

    Dictionary(grouping: statEvents, by: { $0.name })
    
    [
      "dinner": [
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1)
      ],
      "lunch": [
        StatEvents(name: "lunch", date: "01-01-2015", hours: 1),
        StatEvents(name: "lunch", date: "01-01-2015", hours: 1)
    ]
    

    Swift 3:

    public extension Sequence {
        func group<U: Hashable>(by key: (Iterator.Element) -> U) -> [U:[Iterator.Element]] {
            var categories: [U: [Iterator.Element]] = [:]
            for element in self {
                let key = key(element)
                if case nil = categories[key]?.append(element) {
                    categories[key] = [element]
                }
            }
            return categories
        }
    }
    

    Unfortunately, the append function above copies the underlying array, instead of mutating it in place, which would be preferable. This causes a pretty big slowdown. You can get around the problem by using a reference type wrapper:

    class Box<A> {
      var value: A
      init(_ val: A) {
        self.value = val
      }
    }
    
    public extension Sequence {
      func group<U: Hashable>(by key: (Iterator.Element) -> U) -> [U:[Iterator.Element]] {
        var categories: [U: Box<[Iterator.Element]>] = [:]
        for element in self {
          let key = key(element)
          if case nil = categories[key]?.value.append(element) {
            categories[key] = Box([element])
          }
        }
        var result: [U: [Iterator.Element]] = Dictionary(minimumCapacity: categories.count)
        for (key,val) in categories {
          result[key] = val.value
        }
        return result
      }
    }
    

    Even though you traverse the final dictionary twice, this version is still faster than the original in most cases.

    Swift 2:

    public extension SequenceType {
      
      /// Categorises elements of self into a dictionary, with the keys given by keyFunc
      
      func categorise<U : Hashable>(@noescape keyFunc: Generator.Element -> U) -> [U:[Generator.Element]] {
        var dict: [U:[Generator.Element]] = [:]
        for el in self {
          let key = keyFunc(el)
          if case nil = dict[key]?.append(el) { dict[key] = [el] }
        }
        return dict
      }
    }
    

    In your case, you could have the "keys" returned by keyFunc be the names:

    currentStat.statEvents.categorise { $0.name }
    [  
      dinner: [
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1),
        StatEvents(name: "dinner", date: "01-01-2015", hours: 1)
      ], lunch: [
        StatEvents(name: "lunch", date: "01-01-2015", hours: 1),
        StatEvents(name: "lunch", date: "01-01-2015", hours: 1)
      ]
    ]
    

    So you'll get a dictionary, where every key is a name, and every value is an array of the StatEvents with that name.

    Swift 1

    func categorise<S : SequenceType, U : Hashable>(seq: S, @noescape keyFunc: S.Generator.Element -> U) -> [U:[S.Generator.Element]] {
      var dict: [U:[S.Generator.Element]] = [:]
      for el in seq {
        let key = keyFunc(el)
        dict[key] = (dict[key] ?? []) + [el]
      }
      return dict
    }
    
    categorise(currentStat.statEvents) { $0.name }
    

    Which gives the output:

    extension StatEvents : Printable {
      var description: String {
        return "\(self.name): \(self.date)"
      }
    }
    print(categorise(currentStat.statEvents) { $0.name })
    [
      dinner: [
        dinner: 01-01-2015,
        dinner: 01-01-2015,
        dinner: 01-01-2015
      ], lunch: [
        lunch: 01-01-2015,
        lunch: 01-01-2015
      ]
    ]
    

    (The swiftstub is here)