sqlmariadbmariadb-10.6

Nested GROUP BY on single table using SUBSTRING_INDEX


My current query return expected O/P when run on below sample, my question is about how to improve the query and its performance.

Schema SQL

CREATE TABLE function_groups (
  id int NOT NULL AUTO_INCREMENT PRIMARY KEY,
  name varchar(255) NOT NULL UNIQUE
);
INSERT INTO function_groups (name) VALUES ('f1.g1.a1');
INSERT INTO function_groups (name) VALUES ('f1.g1.a2');
INSERT INTO function_groups (name) VALUES ('f1.g1.a3');
INSERT INTO function_groups (name) VALUES ('f1.g1.a4');

INSERT INTO function_groups (name) VALUES ('f1.g2.a1');
INSERT INTO function_groups (name) VALUES ('f1.g2.a2');
INSERT INTO function_groups (name) VALUES ('f1.g2.a3');
INSERT INTO function_groups (name) VALUES ('f1.g2.a4');


INSERT INTO function_groups (name) VALUES ('f2.g1.a1');
INSERT INTO function_groups (name) VALUES ('f2.g1.a2');
INSERT INTO function_groups (name) VALUES ('f2.g1.a3');
INSERT INTO function_groups (name) VALUES ('f2.g1.a4');

INSERT INTO function_groups (name) VALUES ('f2.g2.a1');
INSERT INTO function_groups (name) VALUES ('f2.g2.a2');
INSERT INTO function_groups (name) VALUES ('f2.g2.a3');
INSERT INTO function_groups (name) VALUES ('f2.g2.a4');

Expected O/P

id  groups
f1  [{"id": "f1.g1", "actions": [{"id": 1, "name": "f1.g1.a1"}, {"id": 2, "name": "f1.g1.a2"}, {"id": 3, "name": "f1.g1.a3"}, {"id": 4, "name": "f1.g1.a4"}]}, {"id": "f1.g2", "actions": [{"id": 5, "name": "f1.g2.a1"}, {"id": 6, "name": "f1.g2.a2"}, {"id": 7, "name": "f1.g2.a3"}, {"id": 8, "name": "f1.g2.a4"}]}]
f2  [{"id": "f2.g1", "actions": [{"id": 9, "name": "f2.g1.a1"}, {"id": 10, "name": "f2.g1.a2"}, {"id": 11, "name": "f2.g1.a3"}, {"id": 12, "name": "f2.g1.a4"}]}, {"id": "f2.g2", "actions": [{"id": 13, "name": "f2.g2.a1"}, {"id": 14, "name": "f2.g2.a2"}, {"id": 15, "name": "f2.g2.a3"}, {"id": 16, "name": "f2.g2.a4"}]}]

Query SQL

SELECT 
    SUBSTRING_INDEX(t1.name, '.', 1) AS id,
    (SELECT 
            JSON_ARRAYAGG(JSON_OBJECT('id',
                                t2.id,
                                'actions',
                                (SELECT 
                                        JSON_ARRAYAGG(JSON_OBJECT('id', t3.id, 'name', t3.name))
                                    FROM
                                        function_groups t3
                                    WHERE
                                        t2.id = SUBSTRING_INDEX(t3.name, '.', 2)
                                    GROUP BY t2.id)))
        FROM
            (SELECT 
                SUBSTRING_INDEX(t2.name, '.', 2) AS id
            FROM
                function_groups t2
            GROUP BY SUBSTRING_INDEX(t2.name, '.', 2)) t2
        WHERE
            SUBSTRING_INDEX(t2.id, '.', 1) = SUBSTRING_INDEX(t1.name, '.', 1)
        GROUP BY SUBSTRING_INDEX(t2.id, '.', 1)) AS groups
FROM
    function_groups t1
GROUP BY SUBSTRING_INDEX(t1.name, '.', 1)

Solution

  • Changing data structure will be the best option, but if it's not possible, then use group by only if it's needed, use LIKE rather than substring on column to use index (if any will be created).

    Refactored query

    SELECT 
        distinct SUBSTRING_INDEX(t1.name, '.', 1) AS name_id,
        (SELECT JSON_ARRAYAGG(JSON_OBJECT('id',
                                    t2.name_id,
                                    'actions',
                                    (SELECT 
                                            JSON_ARRAYAGG(JSON_OBJECT('id', t3.id, 'name', t3.name))
                                        FROM
                                            function_groups t3
                                        WHERE
                                            t3.name LIKE concat(t2.name_id,'.%')
                                        )))
            FROM
                (SELECT 
                    distinct SUBSTRING_INDEX(t2.name, '.', 2) AS name_id
                FROM
                    function_groups t2) t2
            WHERE
                t2.name_id LIKE concat(SUBSTRING_INDEX(t1.name, '.', 1),'.%')
           ) AS groups
    FROM function_groups t1
    

    New query EXPLAIN plan

    id select_type and table Extra
    1 PRIMARY t1 Using index; Using temporary
    2 DEPENDENT SUBQUERY Using where
    4 DERIVED t2 Using index; Using temporary
    3 DEPENDENT SUBQUERY t3 Using where; Using index

    Old query EXPLAIN plan

    id select_type and table Extra
    1 PRIMARY t1 Using index; Using temporary; Using filesort
    2 DEPENDENT SUBQUERY Using where
    4 DERIVED t2 Using index; Using temporary; Using filesort
    3 DEPENDENT SUBQUERY t3 Using where; Using index; Using temporary; Using filesort

    DB Fiddle