How can I remove cycles from my directed graph? It's a big graph (100k+ nodes 200k+ edges) so the method needs to be efficient. I need to make the digraph acyclic in order to use functions like networkx.topological_generations.
I've tried methods where I repeatedly generate cycles and remove the last edge in each cycle path but after running for 10+ hours without finishing I considered this a failed attempt.
failed attempt (never finished; inefficient)
def remove_cycles_from_G(G: nx.DiGraph):
search_for_cycles = True
while search_for_cycles:
for cycle_path in nx.simple_cycles(G):
try:
G.remove_edge(cycle_path[-1], cycle_path[0])
except nx.NetworkXError:
# edge has already been disjointed by a previous edge removal.
# Restart cycle generator.
search_for_cycles = (
False # Temporary condition which will be reversed.
)
break
search_for_cycles = not (search_for_cycles)
I've also crafted a more sophisticated heuristic approach based on the demonstrations in this project but even this method doesn't work on a digraph of this size (after an hour of running my memory was maxed out).
I understand that identifying the fewest edges to remove in order to make the digraph acyclic is an NP-hard problem (feedback arc set problem) but I'm not necessarily trying to find the fewest edges to make the digraph acyclic, I just want a fast and efficient approach.
Here's an example of a networkx DiGraph with a ton of cycles. My situation involves even more but this demonstrates the point:
import networkx as nx
import random
def induce_cycles(g: nx.DiGraph, cycles) -> None:
cycles_added = 0
while cycles_added < cycles:
node = random.choice(list(g))
non_parent_ancestors = nx.ancestors(g, node).difference(g.predecessors(node))
if non_parent_ancestors:
g.add_edge(node, random.choice(list(non_parent_ancestors)))
cycles_added += 1
g = nx.balanced_tree(3, 6, create_using=nx.DiGraph())
induce_cycles(g, len(g.edges()) * 5)
# Efficiently remove cycles from g...
(The discussion that got us here was acrimonious. I'm leaving only the good bits for anyone who needs this answer in the future.)
Here is a different approach. I'm trying to imitate a topological sort, EXCEPT that I'm taking the approach of always finding a best next one.
from sortedcontainers import SortedSet
def topological_remove_cycles (g):
# Dictionary of sets of incoming edges. We want to pick nodes with few of them.
incoming = {}
for node in g.nodes:
incoming[node] = set()
for node in g.nodes():
for next_node in g.neighbors(node):
incoming[next_node].add(node)
# Sorted set of (count_incoming, -count_outgoing, node) triplets.
# The first item in the set will have as few incoming nodes as it can, and as many outgoing.
# In other words, it is a greedy choice for a good node to get rid of cycles.
todo = SortedSet()
for node, to_node in incoming.items():
todo.add((len(to_node), -len(g.adj[node]), node))
# And now let's start removing cycles.
while 0 < len(todo):
# Get the best node.
_, _, node = todo.pop(0)
to_node = incoming[node]
for prev_node in to_node:
# Each of these edges is in, or comes from, a cycle.
if prev_node != node:
# Do bookkeeping in todo.
len_in = len(incoming[prev_node])
len_out = len(g.adj[prev_node])
todo.remove((len_in, -len_out, prev_node))
todo.add((len_in, -len_out + 1, prev_node))
g.remove_edge(prev_node, node)
for next_node in g.neighbors(node):
# Do bookkeeping in todo for forgetting about node.
len_in = len(incoming[next_node])
len_out = len(g.adj[next_node])
todo.remove((len_in, -len_out, next_node))
todo.add((len_in - 1, -len_out, next_node))
incoming[next_node].remove(node)
# And now we've guaranteed that node is cycle free and gone from our bookkeeping.
And of course, let's do a small test where the previous code failed.
data = {0: [1, 2, 11], 1: [3, 4, 10], 2: [5, 6], 3: [7, 8, 6, 9], 4: [9, 10], 5: [11, 12, 3], 6: [13, 14, 10, 5, 7], 7: [1], 8: [7, 14], 9: [10], 10: [14], 11: [10], 12: [], 13: [], 14: [1, 2]}
g = nx.from_dict_of_lists(data, create_using=nx.DiGraph())
print(nx.to_dict_of_lists(g))
print(g)
print("Before removing, there are " + str(len(list(nx.simple_cycles(g)))) + " cycles")
topological_remove_cycles(g)
print(g)
print("After removing, there are " + str(len(list(nx.simple_cycles(g)))) + " cycles")
print(nx.to_dict_of_lists(g))
And a larger test. Note that it is so good at untangling things, it usually finds a way to remove fewer edges than were added in creating cycles!
g = nx.balanced_tree(3, 6, create_using=nx.DiGraph())
induce_cycles(g, 100)
print(g)
topological_remove_cycles(g)
print(g)
print("After removing, there are " + str(len(list(nx.simple_cycles(g)))) + " cycles")