gnuplot

gnuplot: How to display time series over a boxplot?


I would like to visualize the change of certain data over time. For this purpose, I would like to display the time courses of the two blocks (cross/long) as points that are connected by a line above the boxes of the reference data. Neighboring plots should not overlap. So how can I define or construct the time scale?

reset session

files          = "data.dat reference.dat"

# File "reference.dat" contains only index / block "norm"
ref_blocks     = "norm"

$REF<<EOD
# norm
RH   LH   RA   LA
12019.1 444.6   16159.2 62355.9
8256.7  171.9   18879.5 67602.1
7560.4  192.7   15405.6 59350.9
13985.5 542.3   18920   56517.7
5401.5  364.9   13164.1 48907.4
3856.3  375.8   15093   73012.8
9087.7  532.8   15255.4 52057.8
8948.1  313.4   13690.2 53685.8
6511    275.3   11455.2 45066
10404.1 264.6   15223.6 60208.2
EOD


# File "data.dat" contains indices / blocks "cross long"
blocks         = "cross long"

$DAT<<EOD
# cross
date      RH        LH        RA        LA
20200203  9500.8    164.3     13004.2   54644.1
20200522  9940.9    203.6     20142.7   49401.3
20210521  11226.3   222.7     14085.7   54759
20220527  13605.5   380.1     19472.8   46403.6
20220701  13206.5   376.3     12784     53552.9
20220706  13148.9   272.6     13455.7   51796.4
20221006  13832.9   311.6     13006.1   52337.4
20230309  13389.8   310.5     12677     54125.9
20230907  13806.3   334.8     13381.2   54700.4
20240919  15376.2   492.4     13050.4   53682.1


# long
date      RH        LH        RA        LA
20200203  9070.1    236.9     5505.8    53849.7
20200522  9489.2    258.5     6505.8    54003.9
20220527  13466     441.1     14128.5   50221.7
20220701  12934.3   400.4     14421.3   52927
20220706  12921.9   386.5     14539.9   50843.7
20221006  13440.1   351.2     15709.2   50322.8
20230309  13258.9   362.8     14416.2   52378.4
20210521  10771.3   294.8     14199.1   53795.7
20230907  13369.6   376.7     14974.9   53158.1
20240919  15216     478.7     14213     51990.8


EOD

# Both files contain columns with header names / labels "RH LH RA LA"
labels         = "RH LH RA LA"
myXtic(n)      = sprintf( "%s", word( labels, n ) )

set style fill solid 0.25 border -1
set style boxplot outliers pointtype 7
set style data boxplot
set boxwidth 0.7 absolute

set title "dat -> ref" font "Arial,14"
set xtics 1, 1, words(labels) scale 0
#set datafile separator '\t'

plot for [i=1:words(labels)] $REF index "norm" using (i):( column( word( labels, i ) ) ) notitle

#replot for [label in labels] $DAT index "long" using (i):( column( label ) ) notitle

Edit: Yes, there should be one timeline per box. All boxes should have the same color and there should be one color for all cross-values and one color for all long-values.

enter image description here

Edit 2:

enter image description here


Solution

  • Here is a suggestion. Once you know the time range via stats you need to scale and shift your time data to the position and width of each of the boxplots.

    Check the following minimized example as starting point for further optimization.

    Script:

    ### overlay boxplots with timedata
    reset session
    
    $REF<<EOD
    # norm
    RH   LH   RA   LA
    12019.1 444.6   16159.2 62355.9
    8256.7  171.9   18879.5 67602.1
    7560.4  192.7   15405.6 59350.9
    13985.5 542.3   18920   56517.7
    5401.5  364.9   13164.1 48907.4
    3856.3  375.8   15093   73012.8
    9087.7  532.8   15255.4 52057.8
    8948.1  313.4   13690.2 53685.8
    6511    275.3   11455.2 45066
    10404.1 264.6   15223.6 60208.2
    EOD
    
    $DAT<<EOD
    # cross
    date      RH        LH        RA        LA
    20200203  9500.8    164.3     13004.2   54644.1
    20200522  9940.9    203.6     20142.7   49401.3
    20210521  11226.3   222.7     14085.7   54759
    20220527  13605.5   380.1     19472.8   46403.6
    20220701  13206.5   376.3     12784     53552.9
    20220706  13148.9   272.6     13455.7   51796.4
    20221006  13832.9   311.6     13006.1   52337.4
    20230309  13389.8   310.5     12677     54125.9
    20230907  13806.3   334.8     13381.2   54700.4
    20240919  15376.2   492.4     13050.4   53682.1
    
    
    # long
    date      RH        LH        RA        LA
    20200203  9070.1    236.9     5505.8    53849.7
    20200522  9489.2    258.5     6505.8    54003.9
    20220527  13466     441.1     14128.5   50221.7
    20220701  12934.3   400.4     14421.3   52927
    20220706  12921.9   386.5     14539.9   50843.7
    20221006  13440.1   351.2     15709.2   50322.8
    20230309  13258.9   362.8     14416.2   52378.4
    20210521  10771.3   294.8     14199.1   53795.7
    20230907  13369.6   376.7     14974.9   53158.1
    20240919  15216     478.7     14213     51990.8
    EOD
    
    myHeaders     = "LH RH LA RA"
    H             = words(myHeaders)
    dataHeader(i) = column(word(myHeaders, i))
    myBlocks      = "cross long"
    B             = words(myBlocks)
    myBlock(i)    = word(myBlocks, i)
    myBoxwidth    =  0.8
    myFmt         = "%Y%m%d"
    t(col,i)      = ((timecolumn(col,myFmt)-t0)/(t1-t0) - 0.5)*myBoxwidth + i
    
    stats $DAT u (timecolumn(1,myFmt)) nooutput
    t0 = STATS_min
    t1 = STATS_max
    
    set title "dat -> ref" font "Arial,14"
    set style boxplot outliers pointtype 7
    set style data boxplot
    set xrange [0.5:H+0.5] noextend
    set logscale y
    set grid y,my
    set style fill solid 1.0 border -1
    set boxwidth myBoxwidth absolute
    set key left top reverse noautotitle
    
    set linetype 1 lc rgb 0xaaff0000
    set linetype 2 lc rgb 0xaa00aa00
    set linetype 3 lc rgb 0x77ffff00
    
    plot for [i=1:H] $REF index "norm" u (i):(dataHeader(i)) lt 3, \
         for [b=1:B] for [i=1:H] $DAT index myBlock(b) u (t(1,i)):(dataHeader(i)) w lp pt 7 lt b, \
         for [b=1:B] keyentry w lp pt 7 lt b ti myBlock(b), \
         keyentry w boxes lt 3 ti "Ref", \
         for [i=1:H] '+' every ::::0 u (i):(NaN):xtic(word(myHeaders, i)) w p
    ### end of script
    

    Result:

    enter image description here