I am having a machine learning project which is designing autonomous driver for speed dreams game in linux. In this case I have to find a way to get keyboard outputs to actual 1-dimensional array like this.
up - down - right - left - upleft - upright - downleft - downright - do nothing
[0 0 0 0 0 0 0 0 1]
I used this code as the starter code for taking screenshots and processing:
import time
import cv2
import mss
import numpy as np
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
last_time = time.time()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(12)
if k > 0:
print(k)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
I know catching keycodes is possible with cv2.waitkey()
function. So I can figure out a way to catch if up - down - left or right is pressed. but is there any way that I could catch keys combination like up-left
, up-right
, ... with cv2.waitkey .
Catching key presses in the loop with cv2.waitkey
is very important to me because it has a huge improvement in performance of my neural network in terms of accuracy.
It sounds that 'cv2.waitkey' is not a good option when you are switching to another program and keep pressing keys. I found this examples and made a sample code to catch keypresses which works perfect in windows and not bad in linux.
import time
import cv2
import mss
import numpy as np
from pynput.keyboard import Key, Listener
def up():
print("Go up")
def down():
print("Go down")
def left():
print("Go left")
def right():
print("Go right")
def up_left():
print("Go up_left")
def up_right():
print("Go up_right")
def down_left():
print("Go down_left")
def down_right():
print("Go down_right")
def do_nothing():
print("Do Nothing")
# Create a mapping of keys to function (use frozenset as sets are not hashable - so they can't be used as keys)
combination_to_function = {
frozenset([Key.up]): up, # No `()` after function_1 because
# we want to pass the function, not the value of the function
frozenset([Key.down, ]): down,
frozenset([Key.left, ]): left,
frozenset([Key.right, ]): right,
frozenset([Key.up, Key.left]): up_left,
frozenset([Key.up, Key.right]): up_right,
frozenset([Key.down, Key.left]): down_left,
frozenset([Key.down, Key.right]): down_right,
}
# Currently pressed keys
current_keys = set()
def on_press(key):
# When a key is pressed, add it to the set we are keeping track of and check if this set is in the dictionary
current_keys.add(key)
if frozenset(current_keys) in combination_to_function:
# If the current set of keys are in the mapping, execute the function
combination_to_function[frozenset(current_keys)]()
def on_release(key):
# When a key is released, remove it from the set of keys we are keeping track of
if key in current_keys:
current_keys.remove(key)
def process_img(original_img):
processed_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
processed_img = cv2.Canny(processed_img, threshold1=200, threshold2=300)
return processed_img
with mss.mss() as sct:
# Part of the screen to capture
monitor = {"top": 0, "left": 70, "width": 640, "height": 480}
while True:
listener = Listener(on_press=on_press, on_release=on_release)
listener.start()
last_time = time.time()
# key_catcher = MockButton()
# Get raw pixels from the screen, save it to a Numpy array
screen = np.array(sct.grab(monitor))
new_screen = process_img(original_img=screen)
# Display the picture
cv2.imshow("Window", new_screen)
# print("Loop took {} seconds".format(time.time() - last_time))
# Press "q" to quit
k = cv2.waitKey(10)
if k & 0xFF == ord("q"):
cv2.destroyAllWindows()
break
listener.stop()