I'm trying to figure out how to write GQL (Google SQL) query to filter deeply nested structure and after that nest it again and leave first of records for STRUCT properties on the same level with ARRAY.
I prepared a schema sample
WITH
Sale AS (
SELECT
"1" AS _id,
STRUCT("11" AS _id,
"SERVICE" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS serviceFeedback,
[STRUCT("host" AS key,
"localhost" AS value),
STRUCT("location" AS key,
"Paris" AS value)] AS tags,
TRUE AS reviewed,
[STRUCT("1" as saleId, STRUCT("101" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS createDate) AS productFeedback),
STRUCT("1" as saleId, STRUCT("102" AS _id,
"PRODUCT" AS feedbackType,
DATE(TIMESTAMP("2017-01-20 14:06:51.655")) AS createDate) AS productFeedback) ] AS saleItems,
DATE(TIMESTAMP("2017-01-20 14:05:51.655")) AS latestFeedbackDate )
And a source filter query that flattened all nested fields are required for filtering.
SELECT
saleId,
serviceFeedback,
saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM (
SELECT
sale._id AS saleId,
serviceFeedback,
sale.tags AS saleTags,
reviewed,
saleItems,
latestFeedbackDate
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC
LIMIT
20
The main issue is that after this filtering a want to group all saleItems
by sale._id
(return the initial structure) and retrieve serviceFeedback
field that has type STRUCT.
The expected result in JSON format is:
{
"saleId":"1",
"serviceFeedback":{"_id":"11","feedbackType":"SERVICE","createDate":"2017-01-20"},
"saleTags":[{"key":"host","value":"localhost"},{"key":"location","value":"Paris"}],
"reviewed":"true",
"saleItems":[
{"saleId":"1","productFeedback":{"_id":"101","feedbackType":"PRODUCT","createDate":"2017-01-20"},
{"saleId":"1","productFeedback":{"_id":"102","feedbackType":"PRODUCT","createDate":"2017-01-20"},
],
"latestFeedbackDate":"2017-01-20"
}
I wrote the simplest idea of query that comes to my mind. It produces the correct result. But probably it's possible to rewrite it more efficient way,
SELECT
saleId,
serviceFeedback,
latestFeedbackDate,
subQuery.saleItems as saleItems
FROM
sale
RIGHT JOIN (
SELECT
saleId,
ARRAY_AGG(saleItems) as saleItems
FROM (
SELECT
saleId,
saleItems
FROM (
SELECT
sale._id AS saleId,
latestFeedbackDate,
saleItems
FROM
`Sale` AS sale,
sale.saleItems AS saleItems
WHERE
reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655")))
ORDER BY
latestFeedbackDate DESC)
GROUP BY
saleId
) AS subQuery
ON
sale._id = subQuery.saleId
Could you suggest me a better solution to achieve the same results?
Could you suggest me a better solution to achieve the same results?
Below produces exact same schema as original table and just applies needed filter to saleItems
#standardSQL
SELECT * REPLACE(
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems)
FROM sale
If you need just subset of fields - use below as an example
#standardSQL
SELECT
_id saleId,
serviceFeedback,
ARRAY(
SELECT saleItems FROM UNNEST(saleItems) saleItems
WHERE reviewed = TRUE
AND serviceFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
AND serviceFeedback._id IS NOT NULL
AND saleItems.productFeedback.createDate >= DATE(TIMESTAMP("2017-01-18 14:05:51.655"))
) AS saleItems
FROM sale