I have Benford test results, test_show
Expected Counts Found Dif AbsDif Z_score
Sec_Dig
0 0.119679 4318 0.080052 -0.039627 0.039627 28.347781
1 0.113890 2323 0.043066 -0.070824 0.070824 51.771489
2 0.108821 1348 0.024991 -0.083831 0.083831 62.513122
3 0.104330 1298 0.024064 -0.080266 0.080266 60.975864
4 0.100308 3060 0.056730 -0.043579 0.043579 33.683738
5 0.096677 6580 0.121987 0.025310 0.025310 19.884178
6 0.093375 10092 0.187097 0.093722 0.093722 74.804141
7 0.090352 9847 0.182555 0.092203 0.092203 74.687841
8 0.087570 8439 0.156452 0.068882 0.068882 56.587749
9 0.084997 6635 0.123007 0.038010 0.038010 31.646817
I'm trying to plot the Benford result using Plotly as below,
Here is the code that I tried so far
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Bar(x=test_show.index,
y=test_show.Found,
name='Found',
marker_color='rgb(55, 83, 109)',
# color="color"
))
fig.add_trace(go.Scatter(x=test_show.index,
y=test_show.Expected,
mode='lines+markers',
name='Expected'
))
fig.update_layout(
title='Benfords Law',
xaxis=dict(
title='Digits',
tickmode='linear',
titlefont_size=16,
tickfont_size=14),
yaxis=dict(
title='% Percentage',
titlefont_size=16,
tickfont_size=14,
),
legend=dict(
x=0,
y=1.0,
bgcolor='rgba(255, 255, 255, 0)',
bordercolor='rgba(255, 255, 255, 0)'
))
fig.show()
How to add the confidence interval to the plot for test_show["Expected"]
?
As of Python 3.8 you can use NormalDist to calculate a confidence interval as explained in detail here. With a slight adjustment to that approach you can include it in your setup with fig.add_traces()
using two go.Scatter()
traces, and then set fill='tonexty', fillcolor = 'rgba(255, 0, 0, 0.2)')
for the last one like this:
CI = confidence_interval(df.Expected, 0.95)
fig.add_traces([go.Scatter(x = df.index, y = df['Expected']+CI,
mode = 'lines', line_color = 'rgba(0,0,0,0)',
showlegend = False),
go.Scatter(x = df.index, y = df['Expected']-CI,
mode = 'lines', line_color = 'rgba(0,0,0,0)',
name = '95% confidence interval',
fill='tonexty', fillcolor = 'rgba(255, 0, 0, 0.2)')])
Please not that this approach calculates a confidence interval from the very limited df.Expected
series. And that might not be what you're looking to do here. So let me know how this initial suggestion works out for you and then we can take it from there.
import plotly.graph_objects as go
import pandas as pd
from statistics import NormalDist
def confidence_interval(data, confidence=0.95):
dist = NormalDist.from_samples(data)
z = NormalDist().inv_cdf((1 + confidence) / 2.)
h = dist.stdev * z / ((len(data) - 1) ** .5)
return h
df = pd.DataFrame({'Expected': {0: 0.119679,
1: 0.11389,
2: 0.108821,
3: 0.10432999999999999,
4: 0.10030800000000001,
5: 0.096677,
6: 0.093375,
7: 0.090352,
8: 0.08757000000000001,
9: 0.084997},
'Counts': {0: 4318,
1: 2323,
2: 1348,
3: 1298,
4: 3060,
5: 6580,
6: 10092,
7: 9847,
8: 8439,
9: 6635},
'Found': {0: 0.080052,
1: 0.043066,
2: 0.024991,
3: 0.024064,
4: 0.056729999999999996,
5: 0.12198699999999998,
6: 0.187097,
7: 0.182555,
8: 0.156452,
9: 0.12300699999999999},
'Dif': {0: -0.039626999999999996,
1: -0.070824,
2: -0.08383099999999999,
3: -0.08026599999999999,
4: -0.043579,
5: 0.02531,
6: 0.093722,
7: 0.092203,
8: 0.068882,
9: 0.03801},
'AbsDif': {0: 0.039626999999999996,
1: 0.070824,
2: 0.08383099999999999,
3: 0.08026599999999999,
4: 0.043579,
5: 0.02531,
6: 0.093722,
7: 0.092203,
8: 0.068882,
9: 0.03801},
'Z_scoreSec_Dig': {0: 28.347781,
1: 51.771489,
2: 62.513121999999996,
3: 60.975864,
4: 33.683738,
5: 19.884178,
6: 74.804141,
7: 74.687841,
8: 56.587749,
9: 31.646817}})
test_show = df
fig = go.Figure()
fig.add_trace(go.Bar(x=test_show.index,
y=test_show.Found,
name='Found',
marker_color='rgb(55, 83, 109)',
# color="color"
))
fig.add_trace(go.Scatter(x=test_show.index,
y=test_show.Expected,
mode='lines+markers',
name='Expected'
))
fig.update_layout(
title='Benfords Law',
xaxis=dict(
title='Digits',
tickmode='linear',
titlefont_size=16,
tickfont_size=14),
yaxis=dict(
title='% Percentage',
titlefont_size=16,
tickfont_size=14,
),
legend=dict(
x=0,
y=1.0,
bgcolor='rgba(255, 255, 255, 0)',
bordercolor='rgba(255, 255, 255, 0)'
))
CI = confidence_interval(df.Expected, 0.95)
fig.add_traces([go.Scatter(x = df.index, y = df['Expected']+CI,
mode = 'lines', line_color = 'rgba(0,0,0,0)',
showlegend = False),
go.Scatter(x = df.index, y = df['Expected']-CI,
mode = 'lines', line_color = 'rgba(0,0,0,0)',
name = '95% confidence interval',
fill='tonexty', fillcolor = 'rgba(255, 0, 0, 0.2)')])
fig.show()