I have the following string:
brasília
And I need to convert to:
brasilia
Withou the ´ accent!
How can I do on BigQuery?
Thank you!
Try below as quick and simple option for you:
#standardSQL
WITH lookups AS (
SELECT
'ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents,
'c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins
),
pairs AS (
SELECT accent, latin FROM lookups,
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1,
UNNEST(SPLIT(latins)) AS latin WITH OFFSET AS p2
WHERE p1 = p2
),
yourTableWithWords AS (
SELECT word FROM UNNEST(
SPLIT('brasília,ångström,aperçu,barège, beau idéal, belle époque, béguin, bête noire, bêtise, Bichon Frisé, blasé, blessèd, bobèche, boîte, bombé, Bön, Boötes, boutonnière, bric-à-brac, Brontë Beyoncé,El Niño')
) AS word
)
SELECT
word AS word_with_accent,
(SELECT STRING_AGG(IFNULL(latin, char), '')
FROM UNNEST(SPLIT(word, '')) char
LEFT JOIN pairs
ON char = accent) AS word_without_accent
FROM yourTableWithWords
Output is
word_with_accent word_without_accent
blessèd blessed
El Niño El Nino
belle époque belle epoque
boîte boite
Boötes Bootes
blasé blase
ångström angstrom
bobèche bobeche
barège barege
bric-à-brac bric-a-brac
bête noire bete noire
Bichon Frisé Bichon Frise
Brontë Beyoncé Bronte Beyonce
bêtise betise
beau idéal beau ideal
bombé bombe
brasília brasilia
boutonnière boutonniere
aperçu apercu
béguin beguin
Bön Bon
UPDATE
Below is how to pack this logic into SQL UDF - so accent2latin(word)
can be called to make a "magic"
#standardSQL
CREATE TEMP FUNCTION accent2latin(word STRING) AS
((
WITH lookups AS (
SELECT
'ç,æ,œ,á,é,í,ó,ú,à,è,ì,ò,ù,ä,ë,ï,ö,ü,ÿ,â,ê,î,ô,û,å,ø,Ø,Å,Á,À,Â,Ä,È,É,Ê,Ë,Í,Î,Ï,Ì,Ò,Ó,Ô,Ö,Ú,Ù,Û,Ü,Ÿ,Ç,Æ,Œ,ñ' AS accents,
'c,ae,oe,a,e,i,o,u,a,e,i,o,u,a,e,i,o,u,y,a,e,i,o,u,a,o,O,A,A,A,A,A,E,E,E,E,I,I,I,I,O,O,O,O,U,U,U,U,Y,C,AE,OE,n' AS latins
),
pairs AS (
SELECT accent, latin FROM lookups,
UNNEST(SPLIT(accents)) AS accent WITH OFFSET AS p1,
UNNEST(SPLIT(latins)) AS latin WITH OFFSET AS p2
WHERE p1 = p2
)
SELECT STRING_AGG(IFNULL(latin, char), '')
FROM UNNEST(SPLIT(word, '')) char
LEFT JOIN pairs
ON char = accent
));
WITH yourTableWithWords AS (
SELECT word FROM UNNEST(
SPLIT('brasília,ångström,aperçu,barège, beau idéal, belle époque, béguin, bête noire, bêtise, Bichon Frisé, blasé, blessèd, bobèche, boîte, bombé, Bön, Boötes, boutonnière, bric-à-brac, Brontë Beyoncé,El Niño')
) AS word
)
SELECT
word AS word_with_accent,
accent2latin(word) AS word_without_accent
FROM yourTableWithWords