I'm creating a query in Impala SQL that needs to perform an operation on data prior to agreggate it. This is my query:
With concatenated_addresses As (
Select site_name, concat(parent_address_line_1, coalesce(parent_address_line_2," "), coalesce(parent_address_line_3," "), coalesce(parent_address_line_4," ")) as concated_address
From locations_all_vw
)
Select l.site_name, min(l.parent_address_region) as region, group_concat(distinct c.concated_address, " | ") as address_line_1,
min(l.parent_city) as city, min(l.parent_cntry_code) as city_code, min(l.parent_county) as country, min(l.parent_state_province) as state_province, min(parent_state_province_code) as province_code, min(parent_location_status) as status,
min(l.parent_location_sub_type) as location_subtype, min(l. parent_location_type) as location_type, min(l.parent_longitude) as longitude, min(l.parent_latitude) as latitue,
min(l.parent_postal_code) as postal_code, min(l.parent_postal_code_ext) as postal_code_ext, group_concat(distinct l.source_system_code, ", ") as source_system, group_concat( distinct l.business_group_description, ", ") as business_group
from locations_all_vw l
INNER JOIN concatenated_addresses c
ON l.site_name = c.site_name
GROUP BY l.site_name
The query first concatenates address fields into 1 in a CTE, joins that CTE to the actual table, and groups everything. I'm doing this to get a single address from multiple columns, and then get distinct values for those aggregated addresses.
The query works, but it's pretty slow (the table has over 100.000 rows). I'm not a SQL expert, so I'm wondering if there is a more performant way to get what I need.
Thanks!
I created the query and it works, but I'd like to get a better query
You can do in two ways -
Select l.site_name, min(l.parent_address_region) as region, group_concat(distinct concat(parent_address_line_1, coalesce(parent_address_line_2," "), coalesce(parent_address_line_3," "), coalesce(parent_address_line_4," ")), " | ") as address_line_1,
min(l.parent_city) as city, min(l.parent_cntry_code) as city_code, min(l.parent_county) as country, min(l.parent_state_province) as state_province, min(parent_state_province_code) as province_code, min(parent_location_status) as status,
min(l.parent_location_sub_type) as location_subtype, min(l. parent_location_type) as location_type, min(l.parent_longitude) as longitude, min(l.parent_latitude) as latitue,
min(l.parent_postal_code) as postal_code, min(l.parent_postal_code_ext) as postal_code_ext, group_concat(distinct l.source_system_code, ", ") as source_system, group_concat( distinct l.business_group_description, ", ") as business_group
from locations_all_vw l
GROUP BY l.site_name
With loc_cte As (
Select l.*, concat(parent_address_line_1, coalesce(parent_address_line_2," "), coalesce(parent_address_line_3," "), coalesce(parent_address_line_4," ")) as concated_address
From locations_all_vw l
)
Select l.site_name, min(l.parent_address_region) as region, group_concat(distinct concated_address, " | ") as address_line_1,
min(l.parent_city) as city, min(l.parent_cntry_code) as city_code, min(l.parent_county) as country, min(l.parent_state_province) as state_province, min(parent_state_province_code) as province_code, min(parent_location_status) as status,
min(l.parent_location_sub_type) as location_subtype, min(l. parent_location_type) as location_type, min(l.parent_longitude) as longitude, min(l.parent_latitude) as latitue,
min(l.parent_postal_code) as postal_code, min(l.parent_postal_code_ext) as postal_code_ext, group_concat(distinct l.source_system_code, ", ") as source_system, group_concat( distinct l.business_group_description, ", ") as business_group
from loc_cte l
GROUP BY l.site_name