algorithmjourney

Algorithm to remove duplicated location on list


I have a service to find journey and remove the duplicated visit city.

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        allPaths.add(List.of("Newyork","Washington","Los Angeles","Chicago"));
        allPaths.add(List.of("Newyork","Washington","Houston"));
        allPaths.add(List.of("Newyork","Dallas"));
        allPaths.add(List.of("Newyork","Columbus", "Chicago"));
        Set<String> orphanageLocations = new HashSet<>();
        removeDuplicatedLocation(allPaths, orphanageLocations);
        //expected allPaths:
        //"Newyork","Washington","Los Angeles","Chicago"
        //"Newyork","Dallas"
        //"Newyork","Columbus"
        
        //expected orphanageLocations
        //"Houston"
        
    }
    
    private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations){
        //do something here
    }

in the allPaths i store all the path from a origin to other cities. but may be some path will contain same city, like Washington appear in both first and second path.

Now i need a service to remove that duplicated city. when two paths has same city then we take the path which going to more city.

And service also return city that can not visit. for example the 2nd path has "Washington" is duplicated with 1st path, then we remove that 2nd path (it has less city than first one), then there are no path to "Houston" available -> becoming orphanage

Other test cases:

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        allPaths.add(List.of("Newyork","Washington","Los Angeles","Chicago", "Dallas"));
        allPaths.add(List.of("Newyork","Los Angeles","Houston", "Philadenphia"));
        allPaths.add(List.of("Newyork","Dallas"));
        allPaths.add(List.of("Newyork","Columbus", "San Francisco"));
        Set<String> orphanageLocations = new HashSet<>();
        removeDuplicatedLocation(allPaths, orphanageLocations);
        //expected allPaths:
        //"Newyork","Washington","Los Angeles","Chicago", "Dallas"
        //"Newyork","Columbus", "San Francisco"
        
        //expected orphanageLocations
        //"Houston","Philadenphia"
        
    }

Would somebody suggest me a algorithm to solve it?

---Edit 1: i update my dirty solution here, still waiting for better one

private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations){
        //sort to make sure the longest path is on top
        List<List<String>> sortedPaths = allPaths.stream().sorted((a, b) -> Integer.compare(b.size(), a.size()))
                .collect(Collectors.toList());
        for(int i = 0; i < sortedPaths.size()-1; i++){
            List<String> path = sortedPaths.get(i);
            orphanageLocations.removeIf(path::contains);
            for(int j = i+1; j < sortedPaths.size(); j++){
                for(int k = 1; k < path.size();k++) {
                    Iterator<String> iterator = sortedPaths.get(j).iterator();
                    boolean isRemove = false;
                    while (iterator.hasNext()) {
                        String city = iterator.next();
                        if(isRemove && !path.contains(city)){
                            orphanageLocations.add(city);
                        }
                        if(StringUtils.equals(city, path.get(k))){
                            isRemove = true;
                        }
                        if(isRemove){
                            iterator.remove();
                        }
                    }
                }
            }
        }
        //remove path if it's only origin
        sortedPaths.removeIf(item -> item.size() == 1);
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

---Edit 2: Thanks for solution of @devReddit, i made a small test with huge amount of route. The more city in each path, the slower your solution is

public static void main(String[] args){
        List<List<String>> allPaths = new ArrayList<>();
        List<List<String>> allPaths2 = new ArrayList<>();
        List<String> locations = Stream.of("A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
                                        "O", "P", "Q", "R", "S", "T", "U", "V", "X", "Y", "Z").collect(Collectors.toList());
        Random rand = new Random();
        int numberOfRoute = 10000;
        String origin = "NY";
        for(int i = 0; i < numberOfRoute; i++){
            List<String> route = new ArrayList<>();
            List<String> route2 = new ArrayList<>();
            route.add(origin);
            route2.add(origin);
            //int routeLength = rand.nextInt(locations.size());
            int routeLength = 10;
            while(route.size() < routeLength){
                int randomIndex = rand.nextInt(locations.size()-1);
                if(!route.contains(locations.get(randomIndex))){
                    route.add(locations.get(randomIndex));
                    route2.add(locations.get(randomIndex));
                }
            }
            allPaths.add(route);
            allPaths2.add(route2);
        }

        System.out.println("Process for " + allPaths2.size() + " routes");
        Set<String> orphanageLocations2 = new HashSet<>();
        long startTime2 = System.currentTimeMillis();
        removeDuplicatedLocation3(allPaths2, orphanageLocations2); //uncle bob solution
        long endTime2 = System.currentTimeMillis();
        System.out.println(allPaths2);
        System.out.println(orphanageLocations2);
        System.out.println("Total time uncleBob solution(ms):" + (endTime2-startTime2));

        System.out.println("Process for " + allPaths.size() + " routes");
        Set<String> orphanageLocations = new HashSet<>();
        long startTime = System.currentTimeMillis();
        removeDuplicatedLocation(allPaths, orphanageLocations); //devReddit solution
        long endTime = System.currentTimeMillis();
        System.out.println(allPaths);
        System.out.println(orphanageLocations);
        System.out.println("Total time devReddit solution(ms):" + (endTime-startTime));
}

//devReddit solution
private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations) {
        List<List<String>> sortedFixedPaths = allPaths                  // List.of produces immutable lists,
                .stream()                                               // so you can't directly remove string from the list
                .sorted((a, b) -> Integer.compare(b.size(), a.size()))  // this fixed list will be needed later
                .collect(Collectors.toList());
        List<List<String>> sortedPaths = sortedFixedPaths              // The list is regenerated through manual deep copy
                .stream()                                              // generated a single string from the streams of
                .map(path ->                                           // each List<String> and created new list, this is now mutable
                        new ArrayList<>(Arrays.asList(String.join(",", path).split(","))))
                .collect(Collectors.toList());
        Set<List<String>> valuesToBeRemoved = new HashSet<>();
        String source = sortedPaths.get(0).get(0);
        Map<String, List<Integer>> cityMapOfIndex = generateHashMap(sortedPaths, source);    // This hashmap keeps track of the existence of cities in different lists
        removeDuplicates(cityMapOfIndex, sortedPaths);                 // this method removes the duplicates from the smaller paths
        // adds the remaining cities to orphanList
        cityMapOfIndex.forEach((cityName, value) -> {           // this block checks whether any mid element in the path is gone
            int index = value.get(0);                        // removes the path from result list
            List<String> list = sortedPaths.get(index);
            int indexInPath = list.indexOf(cityName);
            if (indexInPath != sortedFixedPaths.get(index).indexOf(cityName)) {
                orphanageLocations.add(cityName);
                sortedPaths.get(index).remove(indexInPath);
            }
        });
        valuesToBeRemoved.add(new ArrayList<>(Collections.singleton(source)));   // To handle the case where only source remains in the path
        sortedPaths.removeAll(valuesToBeRemoved);                                 // after removing other duplicates
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

    private static void removeDuplicates(Map<String, List<Integer>> cityMapOfIndex, List<List<String>> sortedPaths) {
        for (Map.Entry<String, List<Integer>> entry : cityMapOfIndex.entrySet()) {
            List<Integer> indexList = entry.getValue();
            while (indexList.size() > 1) {
                int index = indexList.get(indexList.size() - 1);         // get the last index i.e. the smallest list of city where this entry exists
                sortedPaths.get(index).remove(entry.getKey());          // remove the city from the list
                indexList.remove((Integer) index);                // update the index list of occurrence
            }
            cityMapOfIndex.put(entry.getKey(), indexList);
        }
    }

    private static Map<String, List<Integer>> generateHashMap(List<List<String>> sortedPaths,
                                                              String source) {
        Map<String, List<Integer>> cityMapOfIndex = new HashMap<>();
        for (int x = 0; x < sortedPaths.size(); x++) {
            int finalX = x;
            sortedPaths.get(x)
                    .forEach(city -> {
                        if (!city.equalsIgnoreCase(source)) {                   // add entries for all except the source
                            List<Integer> indexList = cityMapOfIndex.containsKey(city) ?         // checks whether there's already an entry
                                    cityMapOfIndex.get(city) : new ArrayList<>();                // to avoid data loss due to overwriting
                            indexList.add(finalX);                                    // adds the index of the List of string
                            cityMapOfIndex.put(city, indexList);               // add or update the map with current indexList
                        }
                    });

        }
        return cityMapOfIndex;
    }

//Bob solution
private static void removeDuplicatedLocation3(List<List<String>> allPaths, Set<String> orphanageLocations){
        //sort to make sure the longest path is on top
        List<List<String>> sortedPaths = allPaths.stream().sorted((a, b) -> Integer.compare(b.size(), a.size()))
                .collect(Collectors.toList());
        for(int i = 0; i < sortedPaths.size()-1; i++){
            List<String> path = sortedPaths.get(i);
            orphanageLocations.removeIf(path::contains);
            for(int j = i+1; j < sortedPaths.size(); j++){
                for(int k = 1; k < path.size();k++) {
                    Iterator<String> iterator = sortedPaths.get(j).iterator();
                    boolean isRemove = false;
                    while (iterator.hasNext()) {
                        String city = iterator.next();
                        if(isRemove && !path.contains(city)){
                            orphanageLocations.add(city);
                        }
                        if(StringUtils.equals(city, path.get(k))){
                            isRemove = true;
                        }
                        if(isRemove){
                            iterator.remove();
                        }
                    }
                }
            }
        }
        //remove path if it's only origin
        sortedPaths.removeIf(item -> item.size() == 1);
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }

Here is one of the result:

Test with route lenth is 6


Process for 10000 routes
[[NY, Q, Y, T, S, X], [NY, E], [NY, V, A, H, N], [NY, J, L, I], [NY, D], [NY, O], [NY, C], [NY, P, M], [NY, F], [NY, K], [NY, U], [NY, G], [NY, R], [NY, B]]
[]
Total time uncleBob solution(ms):326
Process for 10000 routes
[[NY, Q, Y, T, S, X], [NY, E], [NY, V], [NY, J, L], [NY, D], [NY, O]]
[A, B, C, F, G, H, I, K, M, N, P, R, U]
Total time devReddit solution(ms):206

With route length is 10

Process for 10000 routes
[[NY, J, V, G, A, I, B, R, U, S], [NY, L, X, Q, M, E], [NY, K], [NY, Y], [NY, F, P], [NY, N], [NY, H, D], [NY, T, O], [NY, C]]
[]
Total time uncleBob solution(ms):292
Process for 10000 routes
[[NY, J, V, G, A, I, B, R, U, S]]
[C, D, E, F, H, K, L, M, N, O, P, Q, T, X, Y]
Total time devReddit solution(ms):471

Also result is not the same,from the same inpit, mine return more valid route

Actually i this is not what i expect because solution from @devReddit look better & faster

Thanks


Solution

  • Your provided solution is O(m^2xn^2). I've figured out a solution which has O(n^2) time complexity. Necessary comments have been added as explanation:

    The core method removeDuplicatedLocation:

    private static void removeDuplicatedLocation(List<List<String>> allPaths, Set<String> orphanageLocations) {
        List<List<String>> sortedFixedPaths = allPaths                  // List.of produces immutable lists,
                .stream()                                               // so you can't directly remove string from the list
                .sorted((a, b) -> Integer.compare(b.size(), a.size()))  // this fixed list will be needed later
                .collect(Collectors.toList());
        List<List<String>> sortedPaths = sortedFixedPaths              // The list is regenerated through manual deep copy
                .stream()                                              // generated a single string from the streams of
                .map(path ->                                           // each List<String> and created new list, this is now mutable
                        new ArrayList<>(Arrays.asList(String.join(",", path).split(","))))
                .collect(Collectors.toList());
        Set<List<String>> valuesToBeRemoved = new HashSet<>();
        String source = sortedPaths.get(0).get(0);
        Map<String, List<Integer>> cityMapOfIndex = generateHashMap(sortedPaths, source);    // This hashmap keeps track of the existence of cities in different lists
        removeDuplicates(cityMapOfIndex, sortedPaths);                 // this method removes the duplicates from the smaller paths
        cityMapOfIndex.entrySet().stream().forEach(city -> {           // this block checks whether any mid element in the path is gone
            String cityName = city.getKey();                           // adds the remaining cities to orphanList
            int index = city.getValue().get(0);                        // removes the path from result list
            List<String> list = sortedPaths.get(index);
            int indexInPath = list.indexOf(cityName);
            if (indexInPath != sortedFixedPaths.get(index).indexOf(cityName)) {
                orphanageLocations.add(cityName);
                sortedPaths.get(index).remove(indexInPath);
            }
        });
        valuesToBeRemoved.add(new ArrayList<>(Collections.singleton(source)));   // To handle the case where only source remains in the path
        sortedPaths.removeAll(valuesToBeRemoved);                                 // after removing other duplicates
        allPaths.clear();
        allPaths.addAll(sortedPaths);
    }
    

    The removeDuplicates and generateHashMap methods used in the aforementioned stub is given below:

    private static void removeDuplicates(Map<String, List<Integer>> cityMapOfIndex, List<List<String>> sortedPaths) {
        for (Map.Entry<String, List<Integer>> entry : cityMapOfIndex.entrySet()) {
            List<Integer> indexList = entry.getValue();
            while (indexList.size() > 1) {
                int index = indexList.get(indexList.size() - 1);         // get the last index i.e. the smallest list of city where this entry exists
                sortedPaths.get(index).remove(entry.getKey());          // remove the city from the list
                indexList.remove((Integer) index);                // update the index list of occurrence
            }
            cityMapOfIndex.put(entry.getKey(), indexList);
        }
    }
    
    private static Map<String, List<Integer>> generateHashMap(List<List<String>> sortedPaths,
                                                              String source) {
        Map<String, List<Integer>> cityMapOfIndex = new HashMap<>();
        for (int x = 0; x < sortedPaths.size(); x++) {     
            int finalX = x;
            sortedPaths.get(x)
                    .stream()
                    .forEach(city -> {
                        if (!city.equalsIgnoreCase(source)) {                   // add entries for all except the source
                            List<Integer> indexList = cityMapOfIndex.containsKey(city) ?         // checks whether there's already an entry
                                    cityMapOfIndex.get(city) : new ArrayList<>();                // to avoid data loss due to overwriting
                            indexList.add(finalX);                                    // adds the index of the List of string
                            cityMapOfIndex.put(city, indexList);               // add or update the map with current indexList
                        }
                    });
        }
        return cityMapOfIndex;
    }
    

    Please let me know if you have any query.