sqlteradatateradata-sql-assistantteradatasql

Teradata Maximum process end time capture for multiple processes


I am trying to fetch data for one asset for reporting purposes which is dependent on multiple processes and start time should be the earliest job that starts among them and end time should be latest completion time for one of those assets. I prepared one query which is running fine if asset has only one process but need help for multiple processes. Below are the details:

Table Name: ABC.ASSET_CONFIG
              
 ASSET_ID,APPLICATION,ASSET_NAME,MODULE_NAME,ASSET_DESCRIPTION,ASSET_TYPE
 9999,Marketing,Activation Data Mart,ADM,NA,Business Necessary

Table Name: ABC.ASSET_DEPENDENCY_CONFIG
         ASSET_ID,COMPLETING_PROCESS_ID,DEPENDENCY_LEVEL,DATA_DELAY,PRCS_CNTRL_STORE_TYPE                                          
         9999,206,1,-1,ABC.BATCH_CONTROL
         9999,208,1,-1,ABC.BATCH_CONTROL
         9999,508,1,-1,ABC.BATCH_CONTROL
         9999,1024,1,-1,ABC.BATCH_CONTROL

One asset may be related to multiple processes(COMPLETING_PROCESS_ID) which is completing at different times, all needs to be completed(Status as 'C' in BATCH_CONTROL table) to be able to get the full dataset of that asset. We have table(ABC.BATCH_CONTROL) where completion status of each processes are tracked. Status: 'C' denotes - Process Completed.

  Table Name: ABC.BATCH_CONTROL
       DATA_SOURCE_ID,BATCH_RUN_ID,LOAD_START_TS,LOAD_END_TS,BATCH_START_TS,BATCH_END_TS,BATCH_STATUS_CD
      206,784,'2023-05-28 00:00:00','2023-05-29 00:00:00','2023-05-29 13:22:00','2023-05-29 14:36:00','C'
      206,783,'2023-05-27 00:00:00','2023-05-28 00:00:00','2023-05-28 13:23:00','2023-05-28 14:38:00','C'
      206,782,'2023-05-26 00:00:00','2023-05-27 00:00:00','2023-05-27 13:22:00','2023-05-27 15:21:00','C'
      206,781,'2023-05-25 00:00:00','2023-05-26 00:00:00','2023-05-26 13:25:00','2023-05-26 15:04:00','C'

       1024,2224,'2023-05-28 00:00:00','2023-05-29 00:00:00','2023-05-29 17:50:00','2023-05-29 18:23:00','C'
       1024,2223,'2023-05-27 00:00:00','2023-05-28 00:00:00','2023-05-28 17:50:00','2023-05-28 18:21:00','C'
       1024,2222,'2023-05-26 00:00:00','2023-05-27 00:00:00','2023-05-27 17:46:00','2023-05-27 18:22:00','C'
       1024,2221,'2023-05-25 00:00:00','2023-05-26 00:00:00','2023-05-26 17:48:00','2023-05-26 18:18:00','C'

I tried below code which is working fine for a asset which has only one process, if an asset data readiness depends on multiple multiple processes it won't work:

Output i want is:

     9999,Marketing,Business Necessary,Activation Data Mart,ADM,1024,2023-05-28,2023-05-29 13:22:00,2023-05-29 17:50:00, 04:12:23,0 04:12:23.000000,0 00:04:12.0000,203.0000,Completed

Query:

    SELECT 
    QRY3.ASSET_ID,     
    QRY3.APPLICATION,
    QRY3.ASSET_TYP,
    QRY3.ASSET_NAME,
    QRY3.MODULE_NAME,
    QRY3.COMPLETING_PROCESS_ID,
    QRY3.DATA_DATE,
    QRY3.START_TIME,
    QRY3.END_TIME,
    SUBSTRING((TRIM((CAST(((CAST(QRY3.END_TIME AS TIMESTAMP(0)) - CAST(QRY3.START_TIME AS  TIMESTAMP(0))) DAY(4) TO SECOND)AS VARCHAR(50))))),2,9) AS DURATION,
    (TRIM((CAST(((CAST(QRY3.END_TIME AS TIMESTAMP(0)) - CAST(QRY3.START_TIME AS TIMESTAMP(0)))  DAY(4) TO SECOND)AS VARCHAR(50))))) AS DURATION_TIMESTAMP,
    --((CAST(QRY3.END_TIME AS TIMESTAMP(0)) - CAST(QRY3.START_TIME AS TIMESTAMP(0)))          Second(10,6)) AS DURATION_TIMESTAMP_SECOND,
    ((CAST(QRY3.END_TIME AS TIMESTAMP(0)) - QRY3.START_TIME) DAY(4) to SECOND(4)) AS t1,
        (EXTRACT(DAY from t1)*(24*60*60) +
            EXTRACT(HOUR from t1)*(60*60) + 
            EXTRACT(MINUTE from t1)*60 +
            EXTRACT(SECOND from t1)
            ) AS DURATION_SECONDS,
    CASE WHEN ETL1.BATCH_STATUS_CD = 'C' THEN 'Completed'
        WHEN ETL1.BATCH_STATUS_CD = 'R' THEN 'Running'
        WHEN ETL1.BATCH_STATUS_CD = 'N' THEN 'Not Started'
        WHEN ETL1.BATCH_STATUS_CD = 'F' THEN 'Failed'
    END AS STATUS
    FROM
    (
    SELECT 
    QRY2.ASSET_ID,
    QRY2.APPLICATION,
    QRY2.ASSET_TYP,
    QRY2.ASSET_NAME,
    QRY2.MODULE_NAME,
    QRY2.COMPLETING_PROCESS_ID,
    QRY2.DATA_DATE
    ,MIN(QRY2.START_TIME) OVER(PARTITION BY  QRY2.APPLICATION,QRY2.ASSET_TYP,QRY2.ASSET_NAME,QRY2.MODULE_NAME,QRY2.COMPLETING_PROCESS_ID,QRY2.DATA_DATE)AS START_TIME
    ,MAX(QRY2.END_TIME) OVER(PARTITION BY  QRY2.APPLICATION,QRY2.ASSET_TYP,QRY2.ASSET_NAME,QRY2.MODULE_NAME,QRY2.COMPLETING_PROCESS_ID,QRY2.DATA_DATE)AS END_TIME
    ,MAX(QRY2.BATCH_RUN_ID) OVER(PARTITION BY   QRY2.APPLICATION,QRY2.ASSET_TYP,QRY2.ASSET_NAME,QRY2.MODULE_NAME,QRY2.COMPLETING_PROCESS_ID,QRY2.DATA_DATE) AS BATCH_RUN_ID
    FROM
    (
    SELECT 
    QRY1.ASSET_ID,
    QRY1.APPLICATION,
    QRY1.ASSET_TYP,
    QRY1.MODULE_NAME,
    QRY1.ASSET_NAME, 
    QRY1.COMPLETING_PROCESS_ID,
    QRY1.DATA_DATE AS DATA_DATE,
    ETL.BATCH_STATUS_CD,
    CASE WHEN
        QRY1.COMPL_SESSION_NAME IS NOT NULL 
            THEN SESS.SESSION_INSTANCE_START_TS 
        ELSE 
            ETL.BATCH_START_TS
    END AS START_TIME,
    CASE WHEN
        QRY1.COMPL_SESSION_NAME IS NOT NULL 
            THEN SESS.SESSION_INSTANCE_END_TS 
        ELSE 
            ETL.BATCH_END_TS
    END AS END_TIME,
    ETL.BATCH_RUN_ID
    FROM
    (
    select MASTER.ASSET_ID,
    MASTER.APPLICATION,
    MASTER.ASSET_TYP,
    MASTER.MODULE_NAME,
    MASTER.ASSET_NAME, 
    DEPEND.COMPLETING_PROCESS_ID,
    CAL.CALENDAR_DAY_DT AS RUN_DATE,
    cast(CAL.CALENDAR_DAY_DT as format 'YYYY-MM-DD')+ cast(DEPEND.DATA_DELAY as interval DAY)     AS DATA_DATE,
    DEPEND.COMPL_SESSION_NAME
    from 
    ABC.ASSET_CONFIG MASTER
    INNER JOIN
    ABC.ASSET_DEPENDENCY_CONFIG DEPEND
    ON
    MASTER.ASSET_ID = DEPEND.ASSET_ID
    inner join
    ndw_base_views.fiscal_calendar cal
    on
    1=1
    and 
    CALENDAR_DAY_DT BETWEEN '2023-05-27' AND '2023-05-27' 
    where
    DEPEND.PRCS_CNTRL_STORE_TYP = 'NDW_PRCS_CNTRL_VIEWS.NDW_ETL_BATCH_CONTROL'
    GROUP BY 1,2,3,4,5,6,7,8,9
    )QRY1
    LEFT JOIN
    ABC.BATCH_CONTROL ETL
    ON
    ETL.DATA_SOURCE_ID= QRY1.COMPLETING_PROCESS_ID
    AND ETL.INCREMENTAL_LOAD_START_TS = QRY1.DATA_DATE
    --LEFT JOIN
    --ABC.NDW_ETL_WORKFLOW_CONTROL WF
    --ON WF.DATA_SOURCE_ID = ETL.DATA_SOURCE_ID
    --AND ETL.BATCH_RUN_ID = WF.BATCH_RUN_ID
    --LEFT JOIN
    --ABC.NDW_ETL_SESSION_CONTROL SESS
    --ON WF.WORKFLOW_RUN_ID = SESS.WORKFLOW_RUN_ID
    --AND SESS.SESSION_INSTANCE_NM = QRY1.COMPL_SESSION_NAME
    --WHERE QRY1.ASSET_ID = 10080
    ) QRY2
    ) QRY3
    INNER JOIN
    ABC.BATCH_CONTROL ETL1
    ON
    ETL1.DATA_SOURCE_ID = QRY3.COMPLETING_PROCESS_ID
    AND ETL1.BATCH_RUN_ID = QRY3.BATCH_RUN_ID
    GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14;

Solution

  • Read your Query code - You need to do inner join with the ABC.ASSET_DEPENDENCY_CONFIG table on the asset ID to consider the dependency relationship between assets and processes.You need to use the MIN and MAX aggregation functions to determine the earliest start time (MIN(B.BATCH_START_TS)) and the latest completion time (MAX(B.BATCH_END_TS)) among all the processes related to an asset. That takes into account multiple processes for an asset. It identifies the earliest start time and the latest completion time among all the processes for a given asset. So your final code will be like this PS the original answer-er gave a sudo query but another person took some extra time to rewrite the original query with my suggestions which is below

                SELECT 
        MASTER.APPLICATION,
        MASTER.ASSET_NAME,
        MASTER.MODULE_NAME,
        MASTER.ASSET_TYP,
        CAST(CAL.CALENDAR_DAY_DT as format 'YYYY-MM-DD') + CAST(DEPEND.DATA_DELAY as interval DAY) AS DATA_DATE,
        MIN(ETL.BATCH_START_TS) AS START_TIME,
        MAX(ETL.BATCH_END_TS) AS FINAL_REFRESH_TIME,
            CASE 
                    WHEN DEPEND.DATA_DELAY = -1 AND FINAL_REFRESH_TIME < cast(current_date as timestamp(0)) + INTERVAL '9' HOUR THEN 'Yes'
                    WHEN DEPEND.DATA_DELAY = -2 AND FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '9' HOUR THEN 'Yes'
                    ELSE 'No' END AS REFRESHED_BEFORE_9_AM, 
            CASE 
                    WHEN DEPEND.DATA_DELAY = -1 AND FINAL_REFRESH_TIME < cast(current_date as timestamp(0)) + INTERVAL '12' HOUR THEN 'Yes'
                    WHEN DEPEND.DATA_DELAY = -2 AND FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '12' HOUR THEN 'Yes'
                    ELSE 'No' END AS REFRESHED_BEFORE_12_PM,
            CASE     
                    WHEN FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '9' HOUR THEN 'No'
                    ELSE 'Yes' END AS SOURCE_DATA_ISSUE,
            CASE 
                    WHEN FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '9' HOUR THEN 'No' 
                    ELSE 'Yes' END AS ETL_OPS_PROCESS_ISSUE,
            CASE
                    WHEN FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '9' HOUR THEN 'No' 
                    ELSE 'Yes' END AS PLATFORM_ISSUE,
            CASE 
                    WHEN SOURCE_DATA_ISSUE = 'No' AND ETL_OPS_PROCESS_ISSUE = 'No' AND PLATFORM_ISSUE = 'No' THEN 'NA'
                    WHEN SOURCE_DATA_ISSUE = 'Yes' OR ETL_OPS_PROCESS_ISSUE = 'Yes' OR PLATFORM_ISSUE = 'Yes' 
                        AND DEPEND.DATA_DELAY = -1 AND FINAL_REFRESH_TIME < cast(current_date as timestamp(0)) + INTERVAL '9' HOUR THEN 'Yes'
                    WHEN SOURCE_DATA_ISSUE = 'Yes' OR ETL_OPS_PROCESS_ISSUE = 'Yes' OR PLATFORM_ISSUE = 'Yes'
                        AND DEPEND.DATA_DELAY = -2 AND FINAL_REFRESH_TIME < cast(current_date + INTERVAL '1' DAY as timestamp(0)) + INTERVAL '12' HOUR THEN 'Yes'
                    ELSE 'No' END AS ISSUE_RESOLVED_SAME_DAY, 
            SUBSTRING((TRIM((CAST(((CAST(MAX(ETL.BATCH_END_TS) AS TIMESTAMP(0)) - CAST(MIN(ETL.BATCH_START_TS) AS TIMESTAMP(0))) DAY(4) TO SECOND) AS VARCHAR(50))))), 2, 9) AS DURATION,
            /* (TRIM((CAST(((CAST(MAX(ETL.BATCH_END_TS) AS TIMESTAMP(0)) - CAST(MIN(ETL.BATCH_START_TS) AS TIMESTAMP(0))) DAY(4) TO SECOND) AS VARCHAR(50))))) AS DURATION_TIMESTAMP,
            ((CAST(MAX(ETL.BATCH_END_TS) AS TIMESTAMP(0)) - MIN(ETL.BATCH_START_TS)) DAY(4) TO SECOND(4)) AS t1,
            (EXTRACT(DAY FROM t1) * (24 * 60 * 60) +
            EXTRACT(HOUR FROM t1) * (60 * 60) +
            EXTRACT(MINUTE FROM t1) * 60 +
            EXTRACT(SECOND FROM t1)) AS DURATION_SECONDS, */
            CASE ETL.BATCH_STATUS_CD
                WHEN 'C' THEN 'Completed'
                WHEN 'R' THEN 'Running'
                WHEN 'N' THEN 'Not Started'
                WHEN 'F' THEN 'Failed'
            END AS STATUS
        FROM
            ABC.ASSET_CONFIG MASTER
            INNER JOIN ABC.ASSET_DEPENDENCY_CONFIG DEPEND ON MASTER.ASSET_ID = DEPEND.ASSET_ID
            INNER JOIN ABC.ETL_BATCH_CONTROL ETL ON DEPEND.COMPLETING_PROCESS_ID = ETL.DATA_SOURCE_ID 
            INNER JOIN ABC.FISCAL_CALENDAR CAL ON 1=1 
            LEFT JOIN ABC.ETL_WORKFLOW_CONTROL WF ON WF.DATA_SOURCE_ID = ETL.DATA_SOURCE_ID AND WF.BATCH_RUN_ID = ETL.BATCH_RUN_ID
            LEFT JOIN ABC.NDW_ETL_SESSION_CONTROL SESS ON WF.WORKFLOW_RUN_ID = SESS.WORKFLOW_RUN_ID AND SESS.SESSION_INSTANCE_NM = DEPEND.COMPL_SESSION_NAME
            WHERE CAL.CALENDAR_DAY_DT BETWEEN '?ldt1' AND '?ldt2' 
            AND DEPEND.PRCS_CNTRL_STORE_TYP = 'NDW_PRCS_CNTRL_VIEWS.NDW_ETL_BATCH_CONTROL'
            AND CAST(DATA_DATE AS DATE) = CAST(ETL.INCREMENTAL_LOAD_START_TS AS DATE)
        GROUP BY
            --1,2,3,4,5,8,9,10,11,12,13
        MASTER.APPLICATION,
        MASTER.ASSET_NAME,
        MASTER.MODULE_NAME,
        MASTER.ASSET_TYP,
        DATA_DATE,
        DATA_DELAY,
        STATUS