I have this data:
select * from (
select 'A' as JOB, 15 as errors from dual union all
select 'B' as JOB, 17 as errors from dual union all
select 'C' as JOB, 29 as errors from dual union all
select 'D' as JOB, 27 as errors from dual union all
select 'E' as JOB, 35 as errors from dual union all
select 'F' as JOB, 32 as errors from dual union all
select 'G' as JOB, 75 as errors from dual union all
select 'H' as JOB, 31 as errors from dual union all
select 'I' as JOB, 12 as errors from dual union all
select 'J' as JOB, 10 as errors from dual
)
And, in words, I need: The jobs constituting the (top) 60% of errors
So, in this case, that would be (113):
select sum(errors) * .4 as cut_off from ...
The final results would be these, because their sum < 113:
JOB | ERRORS |
---|---|
G | 75 |
E | 35 |
I basically need a filter that keeps some sort of running sum, and then throws away everything once it hits that value.
I have this query, which doesn't quite work and I would prefer not use the with
statement
with data as (
select 'A' as JOB, 15 as errors from dual union all
select 'B' as JOB, 17 as errors from dual union all
select 'C' as JOB, 29 as errors from dual union all
select 'D' as JOB, 27 as errors from dual union all
select 'E' as JOB, 35 as errors from dual union all
select 'F' as JOB, 32 as errors from dual union all
select 'G' as JOB, 75 as errors from dual union all
select 'H' as JOB, 31 as errors from dual union all
select 'I' as JOB, 12 as errors from dual union all
select 'J' as JOB, 10 as errors from dual
)
select k.*
from (
select t.*,
errors + LAG(errors, 1, 0) OVER (order by errors desc ) previous
from data t
) k where previous >= (select sum(errors) *.4 from data) order by errors desc
And I have tried windowed sum:
select k.*
from (
select t.*,
SUM(errors) OVER (
partition by JOB
order by errors desc
RANGE BETWEEN UNBOUNDED PRECEDING
AND CURRENT ROW
) as limit
from (
select 'A' as JOB, 15 as errors from dual union all
select 'B' as JOB, 17 as errors from dual union all
select 'C' as JOB, 29 as errors from dual union all
select 'D' as JOB, 27 as errors from dual union all
select 'E' as JOB, 35 as errors from dual union all
select 'F' as JOB, 32 as errors from dual union all
select 'G' as JOB, 75 as errors from dual union all
select 'H' as JOB, 31 as errors from dual union all
select 'I' as JOB, 12 as errors from dual union all
select 'J' as JOB, 10 as errors from dual
) t
) k order by errors desc
SUM(errors) OVER (ORDER BY errors DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
calculates running total of errors in descending order.WHERE cum_errors <= cut_off
filters jobs with cumulative errors under 40% of total.SELECT job, errors
FROM (
SELECT job, errors,
SUM(errors) OVER (ORDER BY errors DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cum_errors,
SUM(errors) OVER () * 0.4 AS cut_off
FROM (
SELECT 'A' AS job, 15 AS errors FROM DUAL UNION ALL
SELECT 'B' AS job, 17 AS errors FROM DUAL UNION ALL
SELECT 'C' AS job, 29 AS errors FROM DUAL UNION ALL
SELECT 'D' AS job, 27 AS errors FROM DUAL UNION ALL
SELECT 'E' AS job, 35 AS errors FROM DUAL UNION ALL
SELECT 'F' AS job, 32 AS errors FROM DUAL UNION ALL
SELECT 'G' AS job, 75 AS errors FROM DUAL UNION ALL
SELECT 'H' AS job, 31 AS errors FROM DUAL UNION ALL
SELECT 'I' AS job, 12 AS errors FROM DUAL UNION ALL
SELECT 'J' AS job, 10 AS errors FROM DUAL
) t
)
WHERE cum_errors <= cut_off
ORDER BY errors DESC;
Output:
JOB | ERRORS |
---|---|
G | 75 |
E | 35 |