pythonnumpymatplotlibioseek

Accessing the end of of a file being written while live plotting of high speed datastream


My question refers to the great answer of the following question:

Real time data plotting from a high throughput source

As the gen.py code of this answer was growing fast, I wrote own version gen_own.py below, which essentially imposes a delay of 1 ms before writing a new data on the file. I also adapted the code plot.py and wrote my own plot_own.py essentially adding debugging statements. Although I tried to read the doc on the several components of the f.seek(0, io.SEEK_END) line, there are still several points that I don't understand. Here are all the questions that I have

My question is: how can we adapt plot_own.py to work with gen_own.py (with a slower datastream)

Here is the code gen_own.py:

#!/usr/bin/env python3

import time
import random

LIMIT_TIME = 100  # s
DATA_FILENAME = "data.txt"


def gen_data(filename, limit_time):
    start_time = time.time()
    elapsed_time = time.time() - start_time
    old_time = time.time()
    with open(filename, "w") as f:
        while elapsed_time < limit_time:
            new_time = time.time()
            if new_time > old_time + 0.001:
                f.write(f"{time.time():30.12f} {random.random():30.12f}\n")  # produces 64 bytes
                f.flush()
                old_time = time.time()
                elapsed = old_time - start_time
            

gen_data(DATA_FILENAME, LIMIT_TIME)

for competeness here is the code of gen.py (copied from original question)

#!/usr/bin/env python3

import time
import random

LIMIT_TIME = 100  # s
DATA_FILENAME = "data.txt"


def gen_data(filename, limit_time):
    start_time = time.time()
    elapsed_time = time.time() - start_time
    with open(filename, "w") as f:
        while elapsed_time < limit_time:
            f.write(f"{time.time():30.12f} {random.random():30.12f}\n")  # produces 64 bytes
            f.flush()
            elapsed = time.time() - start_time
            

gen_data(DATA_FILENAME, LIMIT_TIME)

Here is the code plot_own.py:

#!/usr/bin/env python3


import io
import time
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.animation


BUFFER_LEN = 64
DATA_FILENAME = "data.txt"
PLOT_LIMIT = 20
ANIM_FILENAME = "video.gif"


fig, ax = plt.subplots(1, 1, figsize=(10,8))
ax.set_title("Plot of random numbers from `gen.py`")
ax.set_xlabel("time / s")
ax.set_ylabel("random number / #")
ax.set_ylim([0, 1])


def get_data(filename, buffer_len, delay=0.0):
    with open(filename, "r") as f:
        print("f.seek(0, io.SEEK_END): " + str(f.seek(0, io.SEEK_END)))
        data = f.read(buffer_len)
        print("f.tell(): " + str(f.tell()))
        print("f.readline(): " + f.readline())
        print("data: " + data)
        if delay:
            time.sleep(delay)
    return data


def animate(i, xs, ys, limit=PLOT_LIMIT, verbose=False):
    # grab the data
    try:
        data = get_data(DATA_FILENAME, BUFFER_LEN)
        if verbose:
            print(data)
        x, y = map(float, data.split())
        if x > xs[-1]:
            # Add x and y to lists
            xs.append(x)
            ys.append(y)
            # Limit x and y lists to 10 items
            xs = xs[-limit:]
            ys = ys[-limit:]
        else:
            print(f"W: {time.time()} :: STALE!")
    except ValueError:
        print(f"W: {time.time()} :: EXCEPTION!")
    else:
        # Draw x and y lists
        ax.clear()
        ax.set_ylim([0, 1])
        ax.plot(xs, ys)


# save video (only to attach here) 
#anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1, frames=3 * PLOT_LIMIT, repeat=False)
#anim.save(ANIM_FILENAME, writer='imagemagick', fps=10)
#print(f"I: Saved to `{ANIM_FILENAME}`")

# show interactively
anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1)
plt.show()
plt.close()

Here is the output of plot_own.py when run simultaneously with gen.py

f.seek(0, io.SEEK_END): 36998872
f.tell(): 36998936
f.readline():      1731141285.629011392593                 0.423847536979

data:        1731141285.629006385803                 0.946414017554

f.seek(0, io.SEEK_END): 37495182
f.tell(): 37495246
f.readline():      1731141285.670451402664                 0.405303398216

data:        1731141285.670446395874                 0.103460518242

f.seek(0, io.SEEK_END): 38084306
f.tell(): 38084370
f.readline():      1731141285.719735860825                 0.360983611461

data:        1731141285.719730854034                 0.318057761442

Here is the output of plot_own.py when run simultaneously with gen_own.py

W: 1731141977.7246473 :: EXCEPTION!
f.seek(0, io.SEEK_END): 156426
f.tell(): 156426
f.readline():
data:
W: 1731141977.7611823 :: EXCEPTION!
f.seek(0, io.SEEK_END): 158472
f.tell(): 158472
f.readline():
data:
W: 1731141977.79479 :: EXCEPTION!
f.seek(0, io.SEEK_END): 160518
f.tell(): 160518
f.readline():        1731141977.828338146210                 0.165056626254

data:
W: 1731141977.8283837 :: EXCEPTION!
f.seek(0, io.SEEK_END): 162626
f.tell(): 162626
f.readline():
data:
W: 1731141977.8621912 :: EXCEPTION!
f.seek(0, io.SEEK_END): 164734
f.tell(): 164734
f.readline():
data:

Solution

  • Even without delay, you have to note that only 1 in 2000 lines are being read and printed and displayed, with delay of 1ms it is 1 in 20 line, but in it there is some issue in seeking end and reading which causes data to be empty several times,

    1. you can implement the method tail function from this nice answer

    therefore your plot_own.py becomes:

    #!/usr/bin/env python3
    
    
    import io
    import os
    import subprocess
    import time
    import matplotlib.pyplot as plt
    import matplotlib as mpl
    import matplotlib.animation
    
    def tail(f, lines=1, _buffer=4098):
        """Tail a file and get X lines from the end"""
        # place holder for the lines found
        lines_found = []
    
        # block counter will be multiplied by buffer
        # to get the block size from the end
        block_counter = -1
    
        # loop until we find X lines
        while len(lines_found) < lines:
            try:
                f.seek(block_counter * _buffer, os.SEEK_END)
            except IOError:  # either file is too small, or too many lines requested
                f.seek(0)
                lines_found = f.readlines()
                break
    
            lines_found = f.readlines()
    
            # we found enough lines, get out
            # Removed this line because it was redundant the while will catch
            # it, I left it for history
            # if len(lines_found) > lines:
            #    break
    
            # decrement the block counter to get the
            # next X bytes
            block_counter -= 1
    
        return lines_found[-lines:]
    
    BUFFER_LEN = 64
    DATA_FILENAME = "data.txt"
    PLOT_LIMIT = 20
    ANIM_FILENAME = "video.gif"
    
    
    fig, ax = plt.subplots(1, 1, figsize=(10,8))
    ax.set_title("Plot of random numbers from `gen.py`")
    ax.set_xlabel("time / s")
    ax.set_ylabel("random number / #")
    ax.set_ylim([0, 1])
    
    
    def get_data(filename, buffer_len, delay=0.0):
        with open(filename, "r") as f:
            data=tail(f, 1, 65)[0]
            print(data)
    
            if delay:
                time.sleep(delay)
        return data
    
    
    def animate(i, xs, ys, limit=PLOT_LIMIT, verbose=False):
        # grab the data
        try:
            data = get_data(DATA_FILENAME, BUFFER_LEN)
            if data:
                if verbose:
                    print(data)
                x, y = map(float, data.split())
                if x > xs[-1]:
                    # Add x and y to lists
                    xs.append(x)
                    ys.append(y)
                    # Limit x and y lists to 10 items
                    xs = xs[-limit:]
                    ys = ys[-limit:]
                else:
                    print(f"W: {time.time()} :: STALE!")
        except ValueError:
            print(f"W: {time.time()} :: EXCEPTION!")
        else:
            # Draw x and y lists
            ax.clear()
            ax.set_ylim([0, 1])
            ax.plot(xs, ys)
    
    
    # save video (only to attach here) 
    #anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1, frames=3 * PLOT_LIMIT, repeat=False)
    #anim.save(ANIM_FILENAME, writer='imagemagick', fps=10)
    #print(f"I: Saved to `{ANIM_FILENAME}`")
    
    # show interactively
    anim = mpl.animation.FuncAnimation(fig, animate, fargs=([time.time()], [None]), interval=1)
    plt.show()
    plt.close()
    

    or

    1. as for your error you can just make sure data is not empty before plotting so exception is not raised in your plot_own.py:
    def animate(i, xs, ys, limit=PLOT_LIMIT, verbose=False):
        # grab the data
        try:
            data = get_data(DATA_FILENAME, BUFFER_LEN)
            if data:
                if verbose:
                    print(data)
                x, y = map(float, data.split())
                if x > xs[-1]:
                    # Add x and y to lists
                    xs.append(x)
                    ys.append(y)
                    # Limit x and y lists to 10 items
                    xs = xs[-limit:]
                    ys = ys[-limit:]
                else:
                    print(f"W: {time.time()} :: STALE!")
        except ValueError:
            print(f"W: {time.time()} :: EXCEPTION!")
        else:
            # Draw x and y lists
            ax.clear()
            ax.set_ylim([0, 1])
            ax.plot(xs, ys)
    

    yes you are still losing data,but this second code is best, i.e. just validate data in your code before plotting with if data:

    Another approach would be to use que, possibly with some heuristics like display every 1 in 5 line,or display according to speed