vectorjuliafind-occurrences

Counting the number of appearance of Tuples in a Vector


Let vec = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "C"), ("B", "D")] be a vector.
I want to count the number of appearance of each element at each step in the for loop.

function count_occurrences(vec)
    vect = []
    nb_appear_A = 0
    nb_appear_B = 0
    nb_appear_C = 0
    nb_appear_D = 0

    for v in vec
        v1, v2 = v 
        if v1 == "A"
            nb_appear_A += 1
        end
        if v2 == "A"
            nb_appear_A += 1
        end
        if v2 == "B"
            nb_appear_B += 1
        end
        if v1 == "B"
            nb_appear_B += 1
        end
        if v1 == "C"
            nb_appear_C += 1
        end
        if v2 == "C"
            nb_appear_C += 1
        end
        if v1 == "D"
            nb_appear_D += 1
        end
        if v2 == "D"
            nb_appear_D += 1
        end

        # I check for each pairs and push them onto vect
        if v1 == "A" && v2 == "B"
            push!(vect, (nb_appear_A, nb_appear_B))
        elseif v1 == "A" && v2 == "C"
            push!(vect, (nb_appear_A, nb_appear_C))
        elseif v1 == "A" && v2 == "D"
            push!(vect, (nb_appear_A, nb_appear_D))
        elseif v1 == "B" && v2 == "C"
            push!(vect, (nb_appear_B, nb_appear_C))
        elseif v1 == "B" && v2 == "D"
            push!(vect, (nb_appear_B, nb_appear_D))
        elseif v1 == "C" && v2 == "D"
            push!(vect, (nb_appear_C, nb_appear_D))
        end
    end

    return vect
end
 count_occurrences(vec)
5-element Vector{Any}:
 (1, 1)
 (2, 1)
 (3, 1)
 (2, 2)
 (3, 2)

This code works, but I would like to apply it for a different vector, say vec = [("A", "B"), ("A", "C"), ("A", "D"), ("B", "C"), ("R", "E"),...]


Solution

  • CORRECTION: After looking at the question code again, I've noticed the results are a bit different. To get values similar to count_occurences in the question:

    function count_occurrences(v)
        d = countmap(Iterators.flatten(v))
        reverse(map(reverse(v)) do p
            r = getindex.(Ref(d), p)
            d[p[1]]-=1
            d[p[2]]-=1
            r
        end)
    end
    

    But maybe a nicer implementation would be:

    count_occurrences(v) = foldl(v; 
      init=(Tuple{Int,Int}[],Dict{String,Int}())) do (r,d),(x,y)
        d[x] = get(d,x,0)+1
        d[y] = get(d,y,0)+1
        push!(r, (d[x],d[y]))
        (r,d)
      end |> first
    

    ADDITION: An even shorter version of the foldl method is:

    count_occurrences(v) = accumulate(v; 
      init=((0,0),Dict{String,Int}())) do (r,d),(x,y)
        (((d[x] = get(d,x,0)+1;), (d[y] = get(d,y,0)+1;)),d)
      end .|> first