javahashapache-commonshash-collisionapache-commons-lang

Apache Commons Lang HashCodeBuilder collision


I am getting collision with using Apache Commons Lang HashCodeBuilder using release 3.4. I am hashing a Route object, which contains two Cell objects, start and end. At the end I am providing an example when collision occurs. Both classes override hashCode and equals method. First the Cell class:

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;

public class Cell {
    private int east;
    private int south;

    public Cell(int east, int south) {
        this.east = east;
        this.south = south;
    }

    public int getEast() {
        return east;
    }

    public void setEast(int east) {
        this.east = east;
    }

    public int getSouth() {
        return south;
    }

    public void setSouth(int south) {
        this.south = south;
    }

    @Override
    /**
     * Compute hash code by using Apache Commons Lang HashCodeBuilder.
     */
    public int hashCode() {
        return new HashCodeBuilder(17, 31)
                .append(this.south)
                .append(this.east)
                .toHashCode();
    }

    @Override
    /**
     * Compute equals by using Apache Commons Lang EqualsBuilder.
     */
    public boolean equals(Object obj) {
        if (!(obj instanceof Cell))
            return false;
        if (obj == this)
            return true;

        Cell cell = (Cell) obj;
        return new EqualsBuilder()
                .append(this.south, cell.south)
                .append(this.east, cell.east)
                .isEquals();
    }
}

And the Route class:

import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;

import java.util.*;

public class Route {
    private Cell startCell;
    private Cell endCell;

    public Route(Cell startCell, Cell endCell) {
        this.startCell = startCell;
        this.endCell = endCell;
    }

    public Cell getStartCell() {
        return startCell;
    }

    public void setStartCell(Cell startCell) {
        this.startCell = startCell;
    }

    public Cell getEndCell() {
        return endCell;
    }

    public void setEndCell(Cell endCell) {
        this.endCell = endCell;
    }


    @Override
    public int hashCode() {
        return new HashCodeBuilder(43, 59)
                .append(this.startCell)
                .append(this.endCell)
                .toHashCode();
    }

    @Override
    public boolean equals(Object obj) {
        if (!(obj instanceof Route))
            return false;
        if (obj == this)
            return true;

        Route route = (Route) obj;
        return new EqualsBuilder()
                .append(this.startCell, route.startCell)
                .append(this.endCell, route.endCell)
                .isEquals();
    }
}

Example of collision:

public class Collision {
    public static void main(String[] args) {
        Route route1 = new Route(new Cell(154, 156), new Cell(154, 156));
        Route route2 = new Route(new Cell(153, 156), new Cell(151, 158));

        System.out.println(route1.hashCode() + " " + route2.hashCode());
    }
}

Output is 1429303 1429303. Now if I change the initial odd number and multiplier odd number to be the same for both classes, then this example does not collide. But in docs for HashCodeBuilder it clearly specifies:

Two randomly chosen, odd numbers must be passed in. Ideally these should be different for each class, however this is not vital.

Ideally I would like to have perfect hash function (injective function) for my example if this is even possible.


Solution

  • You might be able to more optimally distribute your generated hash codes by adding more parameters when generating the hash code (this is independent of the Apache commons library). With this example, you could pre-compute one or more properties of the Route class and use this property when generating the hash code. For instance, calculate the slope of the line between the two Cell objects:

    double slope = (startCell.getEast() - endCell.getEast());
    if ( slope == 0 ){//prevent division by 0
        slope = startCell.getSouth() - endCell.getSouth();
    }else{
        slope = (startCell.getSouth() - endCell.getSouth()) / slope;
    }
    
    return new HashCodeBuilder(43, 59)
       .append(this.startCell)
       .append(this.endCell)
       .append(slope)
       .toHashCode();
    

    Generates 83091911 83088489 with your example. Alternatively (or together with) use the distance between the two Cell objects:

    double length = Math.sqrt(Math.pow(startCell.getSouth() - endCell.getSouth(), 2) + Math.pow(startCell.getEast() - endCell.getEast(), 2));
    return new HashCodeBuilder(43, 59)
       .append(this.startCell)
       .append(this.endCell)
       .append(length)
       .toHashCode();
    

    Which used alone with your example results in 83091911 and -486891382.

    And to test if this prevents collision:

    List<Cell> cells = new ArrayList<Cell>();
    for ( int i = 0; i < 50; i++ ){
        for ( int j = 0; j < 50; j++ ){
            Cell c = new Cell(i,j);
            cells.add(c);
    
        }
    }
    System.out.println(cells.size() + " cells generated");
    System.out.println("Testing " + (cells.size()*cells.size()) + " number of Routes");
    Set<Integer> set = new HashSet<Integer>();
    int collisions = 0;
    for ( int i = 0; i < cells.size(); i++ ){
        for ( int j = 0; j < cells.size(); j++ ){
            Route r = new Route(cells.get(i), cells.get(j));
            if ( set.contains(r.hashCode() ) ){
                collisions++;
            }
            set.add(r.hashCode());
        }
    }
    System.out.println(collisions);
    

    Amongst 6,250,000 Routes generated:

    1. Without length and slope: 6,155,919 collisions
    2. With length and slope: 873,047 collisions