sql-servert-sqltopological-sort

Handling cycles in topological sorting in T-SQL


I've been trying to write a T-SQL script - I am aware it's probably not the best tool for this scenario - that can do a topological sort given a table of nodes and their edges.

I've eventually come up with the following script inspired by Khan's algorithm:

DROP TABLE IF EXISTS #Edges

CREATE TABLE #Edges (EdgeId INT IDENTITY(1,1), FromNode INT, ToNode INT);

INSERT INTO #Edges (FromNode, ToNode) VALUES
(1, 2), (1, 3), (3, 4), (4, 2), (4, 5), (5, 6), (7, NULL), (8, NULL)--, (6, 4);

DROP TABLE IF EXISTS #AllNodesMaster CREATE TABLE #AllNodesMaster (node INT);
INSERT INTO #AllNodesMaster SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges

DECLARE @CurrentLevel INT = 1;

DROP TABLE IF EXISTS #AllNodes      CREATE TABLE #AllNodes      (node INT);
DROP TABLE IF EXISTS #OrderedNodes  CREATE TABLE #OrderedNodes  (Id INT IDENTITY(1,1), node INT, level INT);
DROP TABLE IF EXISTS #InDegrees     CREATE TABLE #InDegrees     (node INT, InDegreeCount INT);
DROP TABLE IF EXISTS #Queue         CREATE TABLE #Queue         (node INT);
DROP TABLE IF EXISTS #DeletedEdges  CREATE TABLE #DeletedEdges  (node INT);

WHILE EXISTS (SELECT FromNode FROM #Edges)
BEGIN

    INSERT INTO #AllNodes SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges

    INSERT INTO #InDegrees
    SELECT T.node, ISNULL (C.InDegreeCount,  0) AS inDegreeCount
    FROM #AllNodes T
    LEFT JOIN (SELECT ToNode AS node, COUNT(FromNode) AS InDegreeCount FROM #Edges GROUP BY ToNode) C ON T.node = C.node
    WHERE T.node IS NOT NULL

    INSERT INTO #Queue SELECT node FROM #InDegrees WHERE inDegreeCount = 0

    INSERT INTO #OrderedNodes (node, level) SELECT node, @CurrentLevel FROM #Queue

    DELETE FROM #Edges OUTPUT deleted.FromNode INTO #DeletedEdges WHERE EdgeId IN (SELECT EdgeId FROM #Edges WHERE FromNode IN (SELECT node FROM #Queue))

    TRUNCATE TABLE #Queue

    INSERT INTO #Queue SELECT node FROM #AllNodes WHERE node NOT IN (SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges) AND node NOT IN (SELECT node FROM #DeletedEdges)

    TRUNCATE TABLE #AllNodes
    TRUNCATE TABLE #DeletedEdges
    TRUNCATE TABLE #InDegrees

    SET @CurrentLevel = @CurrentLevel + 1;

END

SELECT
node, level
FROM
(
SELECT
nm.node,
CASE
    WHEN orn.node IS NOT NULL THEN orn.level
    ELSE @CurrentLevel
END AS level
FROM
#AllNodesMaster nm
LEFT JOIN #OrderedNodes orn ON nm.node = orn.node
) T
WHERE
node IS NOT NULL
ORDER BY
level
ASC

I chose to go for an iterative approach using a WHILE loop instead of a recursive CTE, as I found the latter harder to debug and often hanged endlessly.

The question is, how could cycles be best handled in this scenario using this implementation? Currently, if a cycle is introduced (the commented line "--, (6, 4)" does this), then the WHILE gets caught in an endless loop.

Any suggestions or guidance would be greatly appreciated


Solution

  • The solution to this that worked for me is to do the cycle detection before doing the topological search.

    The implementation for cycle detection in a directed graph was taken from here. Which also came from this answer.