pythonsubprocessmultiprocessorhung

Stop reading process output in Python without hang?


I have a Python program for Linux almost looks like this one :

import os
import time

process = os.popen("top").readlines()

time.sleep(1)

os.popen("killall top")

print process

the program hangs in this line :

process = os.popen("top").readlines()

and that happens in the tools that keep update outputting like "Top"

my best trials :

import os
import time
import subprocess

process = subprocess.Popen('top')

time.sleep(2)

os.popen("killall top")

print process

it worked better than the first one (it's kelled ), but it returns :

<subprocess.Popen object at 0x97a50cc>

the second trial :

import os
import time
import subprocess

process = subprocess.Popen('top').readlines()

time.sleep(2)

os.popen("killall top")

print process

the same as the first one. It hanged due to "readlines()"

Its returning should be like this :

top - 05:31:15 up 12:12,  5 users,  load average: 0.25, 0.14, 0.11
Tasks: 174 total,   2 running, 172 sleeping,   0 stopped,   0 zombie
Cpu(s):  9.3%us,  3.8%sy,  0.1%ni, 85.9%id,  0.9%wa,  0.0%hi,  0.0%si,  0.0%st
Mem:   1992828k total,  1849456k used,   143372k free,   233048k buffers
Swap:  4602876k total,        0k used,  4602876k free,  1122780k cached

  PID USER      PR  NI  VIRT  RES  SHR S %CPU %MEM    TIME+  COMMAND            
31735 Barakat   20   0  246m  52m  20m S 19.4  2.7  13:54.91 totem              
 1907 root      20   0 91264  45m  15m S  1.9  2.3  38:54.14 Xorg               
 2138 Barakat   20   0 17356 5368 4284 S  1.9  0.3   3:00.15 at-spi-registry    
 2164 Barakat    9 -11  164m 7372 6252 S  1.9  0.4   2:54.58 pulseaudio         
 2394 Barakat   20   0 27212 9792 8256 S  1.9  0.5   6:01.48 multiload-apple    
 6498 Barakat   20   0 56364  30m  18m S  1.9  1.6   0:03.38 pyshell            
    1 root      20   0  2880 1416 1208 S  0.0  0.1   0:02.02 init               
    2 root      20   0     0    0    0 S  0.0  0.0   0:00.02 kthreadd           
    3 root      RT   0     0    0    0 S  0.0  0.0   0:00.12 migration/0        
    4 root      20   0     0    0    0 S  0.0  0.0   0:02.07 ksoftirqd/0        
    5 root      RT   0     0    0    0 S  0.0  0.0   0:00.00 watchdog/0         
    9 root      20   0     0    0    0 S  0.0  0.0   0:01.43 events/0           
   11 root      20   0     0    0    0 S  0.0  0.0   0:00.00 cpuset             
   12 root      20   0     0    0    0 S  0.0  0.0   0:00.02 khelper            
   13 root      20   0     0    0    0 S  0.0  0.0   0:00.00 netns              
   14 root      20   0     0    0    0 S  0.0  0.0   0:00.00 async/mgr          
   15 root      20   0     0    0    0 S  0.0  0.0   0:00.00 pm

and save in the variable "process". Any I idea guys, I'm really stuck now ?


Solution

  • #!/usr/bin/env python
    """Start process; wait 2 seconds; kill the process; print all process output."""
    import subprocess
    import tempfile
    import time
    
    def main():
        # open temporary file (it automatically deleted when it is closed)
        #  `Popen` requires `f.fileno()` so `SpooledTemporaryFile` adds nothing here
        f = tempfile.TemporaryFile() 
    
        # start process, redirect stdout
        p = subprocess.Popen(["top"], stdout=f)
    
        # wait 2 seconds
        time.sleep(2)
    
        # kill process
        #NOTE: if it doesn't kill the process then `p.wait()` blocks forever
        p.terminate() 
        p.wait() # wait for the process to terminate otherwise the output is garbled
    
        # print saved output
        f.seek(0) # rewind to the beginning of the file
        print f.read(), 
        f.close()
    
    if __name__=="__main__":
        main()
    

    Tail-like Solutions that print only the portion of the output

    You could read the process output in another thread and save the required number of the last lines in a queue:

    import collections
    import subprocess
    import time
    import threading
    
    def read_output(process, append):
        for line in iter(process.stdout.readline, ""):
            append(line)
    
    def main():
        # start process, redirect stdout
        process = subprocess.Popen(["top"], stdout=subprocess.PIPE, close_fds=True)
        try:
            # save last `number_of_lines` lines of the process output
            number_of_lines = 200
            q = collections.deque(maxlen=number_of_lines) # atomic .append()
            t = threading.Thread(target=read_output, args=(process, q.append))
            t.daemon = True
            t.start()
    
            #
            time.sleep(2)
        finally:
            process.terminate() #NOTE: it doesn't ensure the process termination
    
        # print saved lines
        print ''.join(q)
    
    if __name__=="__main__":
        main()
    

    This variant requires q.append() to be atomic operation. Otherwise the output might be corrupted.

    signal.alarm() solution

    You could use signal.alarm() to call the process.terminate() after specified timeout instead of reading in another thread. Though it might not interact very well with the subprocess module. Based on @Alex Martelli's answer:

    import collections
    import signal
    import subprocess
    
    class Alarm(Exception):
        pass
    
    def alarm_handler(signum, frame):
        raise Alarm
    
    def main():
        # start process, redirect stdout
        process = subprocess.Popen(["top"], stdout=subprocess.PIPE, close_fds=True)
    
        # set signal handler
        signal.signal(signal.SIGALRM, alarm_handler)
        signal.alarm(2) # produce SIGALRM in 2 seconds
    
        try:
            # save last `number_of_lines` lines of the process output
            number_of_lines = 200
            q = collections.deque(maxlen=number_of_lines)
            for line in iter(process.stdout.readline, ""):
                q.append(line)
            signal.alarm(0) # cancel alarm
        except Alarm:
            process.terminate()
        finally:
            # print saved lines
            print ''.join(q)
    
    if __name__=="__main__":
        main()
    

    This approach works only on *nix systems. It might block if process.stdout.readline() doesn't return.

    threading.Timer solution

    import collections
    import subprocess
    import threading
    
    def main():
        # start process, redirect stdout
        process = subprocess.Popen(["top"], stdout=subprocess.PIPE, close_fds=True)
    
        # terminate process in timeout seconds
        timeout = 2 # seconds
        timer = threading.Timer(timeout, process.terminate)
        timer.start()
    
        # save last `number_of_lines` lines of the process output
        number_of_lines = 200
        q = collections.deque(process.stdout, maxlen=number_of_lines)
        timer.cancel()
    
        # print saved lines
        print ''.join(q),
    
    if __name__=="__main__":
        main()
    

    This approach should also work on Windows. Here I've used process.stdout as an iterable; it might introduce an additional output buffering, you could switch to the iter(process.stdout.readline, "") approach if it is not desirable. if the process doesn't terminate on process.terminate() then the scripts hangs.

    No threads, no signals solution

    import collections
    import subprocess
    import sys
    import time
    
    def main():
        args = sys.argv[1:]
        if not args:
            args = ['top']
    
        # start process, redirect stdout
        process = subprocess.Popen(args, stdout=subprocess.PIPE, close_fds=True)
    
        # save last `number_of_lines` lines of the process output
        number_of_lines = 200
        q = collections.deque(maxlen=number_of_lines)
    
        timeout = 2 # seconds
        now = start = time.time()    
        while (now - start) < timeout:
            line = process.stdout.readline()
            if not line:
                break
            q.append(line)
            now = time.time()
        else: # on timeout
            process.terminate()
    
        # print saved lines
        print ''.join(q),
    
    if __name__=="__main__":
        main()
    

    This variant use neither threads, no signals but it produces garbled output in the terminal. It will block if process.stdout.readline() blocks.