sqloracle-database

How to get only the records who's sum constitute > x % of the total


I have this data:

select * from (
    select 'A' as JOB, 15 as errors from dual union all
    select 'B' as JOB, 17 as errors from dual union all
    select 'C' as JOB, 29 as errors from dual union all
    select 'D' as JOB, 27 as errors from dual union all
    select 'E' as JOB, 35 as errors from dual union all
    select 'F' as JOB, 32 as errors from dual union all
    select 'G' as JOB, 75 as errors from dual union all
    select 'H' as JOB, 31 as errors from dual union all
    select 'I' as JOB, 12 as errors from dual union all
    select 'J' as JOB, 10 as errors from dual
)

And, in words, I need: The jobs constituting the (top) 60% of errors

So, in this case, that would be (113):

select sum(errors) * .4 as cut_off from ...

The final results would be these, because their sum < 113:

JOB ERRORS
G 75
E 35

I basically need a filter that keeps some sort of running sum, and then throws away everything once it hits that value.

I have this query, which doesn't quite work and I would prefer not use the with statement

with data as (
    select 'A' as JOB, 15 as errors from dual union all
        select 'B' as JOB, 17 as errors from dual union all
        select 'C' as JOB, 29 as errors from dual union all
        select 'D' as JOB, 27 as errors from dual union all
        select 'E' as JOB, 35 as errors from dual union all
        select 'F' as JOB, 32 as errors from dual union all
        select 'G' as JOB, 75 as errors from dual union all
        select 'H' as JOB, 31 as errors from dual union all
        select 'I' as JOB, 12 as errors from dual union all
        select 'J' as JOB, 10 as errors from dual
)
select k.*
from (
    select t.*,
           errors + LAG(errors, 1, 0) OVER (order by errors desc ) previous
    from data t
) k where previous >= (select sum(errors) *.4 from data) order by errors desc

And I have tried windowed sum:

select k.*
from (
    select t.*,
           SUM(errors) OVER (
               partition by JOB
               order by errors desc
               RANGE BETWEEN UNBOUNDED PRECEDING
                AND CURRENT ROW
          ) as limit
    from (
        select 'A' as JOB, 15 as errors from dual union all
        select 'B' as JOB, 17 as errors from dual union all
        select 'C' as JOB, 29 as errors from dual union all
        select 'D' as JOB, 27 as errors from dual union all
        select 'E' as JOB, 35 as errors from dual union all
        select 'F' as JOB, 32 as errors from dual union all
        select 'G' as JOB, 75 as errors from dual union all
        select 'H' as JOB, 31 as errors from dual union all
        select 'I' as JOB, 12 as errors from dual union all
        select 'J' as JOB, 10 as errors from dual
    ) t
) k order by errors desc

Solution

  • SELECT job, errors
    FROM (
        SELECT job, errors,
            SUM(errors) OVER (ORDER BY errors DESC
                ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cum_errors,
            SUM(errors) OVER () * 0.4 AS cut_off
        FROM (
            SELECT 'A' AS job, 15 AS errors FROM DUAL UNION ALL
            SELECT 'B' AS job, 17 AS errors FROM DUAL UNION ALL
            SELECT 'C' AS job, 29 AS errors FROM DUAL UNION ALL
            SELECT 'D' AS job, 27 AS errors FROM DUAL UNION ALL
            SELECT 'E' AS job, 35 AS errors FROM DUAL UNION ALL
            SELECT 'F' AS job, 32 AS errors FROM DUAL UNION ALL
            SELECT 'G' AS job, 75 AS errors FROM DUAL UNION ALL
            SELECT 'H' AS job, 31 AS errors FROM DUAL UNION ALL
            SELECT 'I' AS job, 12 AS errors FROM DUAL UNION ALL
            SELECT 'J' AS job, 10 AS errors FROM DUAL
        ) t
    )
    WHERE cum_errors <= cut_off
    ORDER BY errors DESC;
    

    Output:

    JOB ERRORS
    G 75
    E 35

    fiddle