How to create a join that has all product_features
with all feature_text
rows
The join must not have duplicated rows and must pivot two languages into two different columns. The join should be as efficient / as fast as possible.
The relation between product_features
and feature_text
is
B12XBE
for DSP
)DSP
, XAB
for UTB2EF
& X9Y8Z7
)The result of the query should be this
Year | product | feature | prod_group | text_de | text_en |
---|---|---|---|---|---|
2024 | B12XBE | DSP | B1 | Anzeige 2,5 | Display 1" |
2024 | B12XBE | B70 | B1 | NULL | B1 hard... |
2024 | UTB2EF | U04 | UT | NULL | UT diam... |
2024 | UTB2EF | DSP | NULL | Anzeige | Display |
2024 | X9Y8Z7 | DSP | NULL | Anzeige | Display |
2024 | X9Y8Z7 | XAB | X9 | 13mm for ... | 13mm für .. |
If a specified text is available (B1 DSP
) use this text Display 1"
otherwise use the non-specific (DSP
) text Display
My query below produces false matches (gist, sqlime.org demo):
2024 B12XBE A1C B1 B1 hardened steal UT gehärteter Stahl
is wrong. The texts should always start with UT
.Sample:
DROP TABLE IF EXISTS product_features;
DROP TABLE IF EXISTS feature_text;
CREATE TABLE product_features (
year INTEGER, -- (4)
product TEXT, -- (6)
feature TEXT -- (3)
);
CREATE TABLE feature_text (
year INTEGER, -- (4)
product_group TEXT,
-- (2) usually matches the first two digits of product
-- but is sometimes empty / NULL or blank ' '
feature TEXT, -- (3)
language INTEGER, -- (2)
description1 TEXT -- up to 40
);
INSERT INTO product_features (year, product, feature) VALUES
(2024, 'B12XBE', 'A1C'), (2024, 'B12XBE', '7B0'),
(2024, 'B12XBE', 'DUL'), (2024, 'UTB2EF', 'A1C'),
(2024, 'UTB2EF', '7B0'), (2024, 'UTB2EF', 'DUL'),
(2024, 'X9Y8Z7', 'DUH'), (2024, 'X9Y8Z7', '7B0');
INSERT INTO feature_text
(year, product_group, feature, language, description1)
VALUES
(2024, 'B1', 'A1C', 1, 'B1 hardened steel'),
(2024, 'B1', '7B0', 1, 'B1 diamond tip'),
(2024, 'B1', 'DSP', 1, 'display 1 inch'),
(2024, 'B1', 'DSP', 1, 'Anzeige 1,5 cm'),
(2024, 'UT', 'A1C', 1, 'UT hardened steel'),
(2024, 'UT', '7B0', 1, 'UT diamond tip'),
(2024, 'UT', 'DSP', 1, 'display 1,5 inch'),
(2024, 'UT', 'DSP', 1, 'Anzeige 2,25 cm'),
(2024, ' ', 'DUL', 1, '10mm for light duty'),
(2024, 'X9', '7B0', 1, 'X9 diamond tip'),
(2024, ' ', 'DUH', 1, '13mm for heavy duty'),
(2024, 'B1', 'A1C', 2, 'B1 gehärteter Stahl'),
(2024, 'B1', '7B0', 2, 'B1 Diamant Spitze'),
(2024, 'UT', 'A1C', 2, 'UT gehärteter Stahl'),
(2024, 'UT', '7B0', 2, 'UT Diamant Spitze'),
(2024, ' ', 'DUL', 2, '10mm für leichte Aufgaben'),
(2024, 'X9', '7B0', 2, 'X9 Diamant Spitze'),
(2024, ' ', 'DUH', 2, '13mm für schwere Aufgaben'),
(2024, NULL, 'DSP', 1, 'display'),
(2024, NULL, 'DSP', 1, 'Anzeige');
My approach:
WITH prft_en as (
SELECT
pf.year, pf.product, pf.feature,
COALESCE(ft1.product_group, ft2.product_group) AS product_group,
COALESCE(ft1.description1, ft2.description1) AS feature_text_en,
COALESCE(ft1.language, ft2.language) AS language_en
FROM product_features pf
LEFT JOIN feature_text ft1
ON pf.year = ft1.year
AND pf.feature = ft1.feature
AND substr(pf.product, 1,2) = ft1.product_group
LEFT JOIN feature_text ft2
ON pf.year = ft2.year
AND pf.feature = ft2.feature
AND ft2.product_group is ' ' OR ft2.product_group is NULL
WHERE ft1.language = 1 OR ft2.language = 1
), prft_de AS (
SELECT
pf.year, pf.product, pf.feature,
COALESCE(ft1.product_group, ft2.product_group) AS product_group,
COALESCE(ft1.description1, ft2.description1) AS feature_text_de,
COALESCE(ft1.language, ft2.language) AS language_de
FROM product_features pf
LEFT JOIN feature_text ft1
ON pf.year = ft1.year
AND pf.feature = ft1.feature
AND substr(pf.product, 1,2) = ft1.product_group
LEFT JOIN feature_text ft2
ON pf.year = ft2.year
AND pf.feature = ft2.feature
AND ft2.product_group is ' ' OR ft2.product_group is NULL
WHERE ft1.language = 2 OR ft2.language = 2
)
Query for the CTEs:
SELECT prft_en.year, prft_en.product,
prft_en.feature, prft_en.product_group,
prft_en.feature_text_en, prft_de.feature_text_de
FROM prft_en
LEFT JOIN prft_de
ON prft_en.year = prft_de.year
AND prft_en.feature = prft_de.feature
AND SUBSTR(prft_en.product, 1,2) = prft_en.product_group
I suppose all you need is the LEFT join by complex condition. Like this:
select
product_features.year,
product_features.product,
product_features.feature,
substr(product_features.product, 1, 2) AS product_group,
text_en.description1 AS en,
text_de.description1 AS de
FROM product_features
LEFT JOIN feature_text AS text_en
ON text_en.year = product_features.year
AND text_en.feature = product_features.feature
AND (substr(product_features.product, 1, 2) = text_en.product_group OR text_en.product_group IS NULL OR TRIM(text_en.product_group) = '')
AND text_en.language = 1
LEFT JOIN feature_text AS text_de
ON text_de.year = product_features.year
AND text_de.feature = product_features.feature
AND (substr(product_features.product, 1, 2) = text_de.product_group OR text_de.product_group IS NULL OR TRIM(text_de.product_group) = '')
AND text_de.language = 2
Note that you will get duplicates anyway if the different descriptions are provided both for individual product and for the product group. If you need to resolve such kind of duplicates then every LEFT JOIN needs to me made 2 times (for the product and for the group) and then the appropriate description should be taken with COALESCE.
Edit: Example how to avoid the latter kind of duplicates
select
product_features.year,
product_features.product,
product_features.feature,
substr(product_features.product, 1, 2) AS product_group,
COALESCE(text_en.description1, text_en_gen.description1) AS en,
COALESCE(text_de.description1, text_de_gen.description1) AS de
FROM product_features
LEFT JOIN feature_text AS text_en
ON text_en.year = product_features.year
AND text_en.feature = product_features.feature
AND (substr(product_features.product, 1, 2) = text_en.product_group)
AND text_en.language = 1
LEFT JOIN feature_text AS text_en_gen
ON text_en_gen.year = product_features.year
AND text_en_gen.feature = product_features.feature
AND (text_en_gen.product_group IS NULL OR TRIM(text_en_gen.product_group) = '')
AND text_en_gen.language = 1
LEFT JOIN feature_text AS text_de
ON text_de.year = product_features.year
AND text_de.feature = product_features.feature
AND (substr(product_features.product, 1, 2) = text_de.product_group)
AND text_de.language = 2
LEFT JOIN feature_text AS text_de_gen
ON text_de_gen.year = product_features.year
AND text_de_gen.feature = product_features.feature
AND (text_de_gen.product_group IS NULL OR TRIM(text_de_gen.product_group) = '')
AND text_de_gen.language = 2