datecastinggoogle-bigqueryformatdatetime

How to convert multiple string formats into one date format in BigQuery?


I have a date column say "payment date" in my data which has multiple string formats such as ddmmyyyy,ddmyyyy and yyyymmdd. Does anyone know how I can convert all of these into a unified date format like dd-mm-yyyy in BigQuery?


Solution

  • Below example is for BigQuery Standard SQL:

    #standardSQL
    SELECT payment_date, 
      FORMAT_DATE('%d-%m-%Y', CASE LENGTH(payment_date) 
        WHEN 7 THEN 
          SAFE.DATE(
            SAFE_CAST(SUBSTR(payment_date, -4) AS INT64), 
            SAFE_CAST(SUBSTR(payment_date, 3, 1) AS INT64), 
            SAFE_CAST(SUBSTR(payment_date, 1, 2) AS INT64)
          )    
        WHEN 8 THEN 
          CASE 
            WHEN EXTRACT(YEAR FROM date_ddmmyyyy) > 2000 THEN date_ddmmyyyy
            ELSE date_yyyymmdd
          END    
        ELSE NULL
      END) formatted_payment_date
    FROM `project.dataset.table`, 
      UNNEST([STRUCT<date_ddmmyyyy DATE, date_yyyymmdd DATE>(
        SAFE.PARSE_DATE('%d%m%Y', payment_date), 
        SAFE.PARSE_DATE('%Y%m%d', payment_date)
      )])
    

    You can test and play with above using dummy data as below

    #standradSQL
    WITH `project.dataset.table` AS (
      SELECT 1 id, '11112011' payment_date UNION ALL
      SELECT 2,    '1112011' UNION ALL
      SELECT 3,    '20111111' UNION ALL
      SELECT 4,    '20112011' UNION ALL
      SELECT 5,    '20110228'
    )
    SELECT id, payment_date, 
      FORMAT_DATE('%d-%m-%Y', CASE LENGTH(payment_date) 
        WHEN 7 THEN 
          SAFE.DATE(
            SAFE_CAST(SUBSTR(payment_date, -4) AS INT64), 
            SAFE_CAST(SUBSTR(payment_date, 3, 1) AS INT64), 
            SAFE_CAST(SUBSTR(payment_date, 1, 2) AS INT64)
          )    
        WHEN 8 THEN 
          CASE 
            WHEN EXTRACT(YEAR FROM date_ddmmyyyy) > 2000 THEN date_ddmmyyyy
            ELSE date_yyyymmdd
          END    
        ELSE NULL
      END) formatted_payment_date
    FROM `project.dataset.table`, 
      UNNEST([STRUCT<date_ddmmyyyy DATE, date_yyyymmdd DATE>(
        SAFE.PARSE_DATE('%d%m%Y', payment_date), 
        SAFE.PARSE_DATE('%Y%m%d', payment_date)
      )])
    ORDER BY id
    

    with result as:

    Row id  payment_date    formatted_payment_date   
    1   1   11112011        11-11-2011   
    2   2   1112011         11-01-2011   
    3   3   20111111        11-11-2011   
    4   4   20112011        20-11-2011   
    5   5   20110228        28-02-2011