OpenCV - Extracting lines on a graph

I would like to create a program that is able to extract lines from a graph.

For example, if a graph like this is inputted, I would just want the red line to be outputted.

Below I have tried to do this using a hough line transformation, however, I do not get very promising results.

import cv2
import numpy as np

graph_img = cv2.imread("/Users/2020shatgiskessell/Desktop/Graph1.png")
gray = cv2.cvtColor(graph_img, cv2.COLOR_BGR2GRAY)

kernel_size = 5
#grayscale image
blur_gray = cv2.GaussianBlur(gray,(kernel_size, kernel_size),0)

#Canny edge detecion
edges = cv2.Canny(blur_gray, 50, 150)

#Hough Lines Transformation

#distance resoltion of hough grid (pixels)
rho = 1 
#angular resolution of hough grid (radians)
theta = np.pi/180 
#minimum number of votes
threshold = 15 

#play around with these
min_line_length = 25
max_line_gap = 20

#make new image
line_image = np.copy(graph_img)

#returns array of lines
lines = cv2.HoughLinesP(edges, rho, theta, threshold, np.array([]),
                    min_line_length, max_line_gap)

for line in lines:
    for x1,y1,x2,y2 in line:
        cv2.line(line_image,(x1,y1),(x2,y2),(255,0,0),2)


lines_edges = cv2.addWeighted(graph_img, 0.8, line_image, 1, 0)

cv2.imshow("denoised image",edges)


if cv2.waitKey(0) & 0xff == 27:
    cv2.destroyAllWindows()

This produces the output image below, which does not accurately recognize the graph line. How might I go about doing this?

Note: For now, I am not concerned about the graph titles or any other text.

I would also like the code to work for other graph images aswell, such as: etc.

Solution

If the graph does not have many noises around it (like your example) I would suggest to threshold your image with Otsu threshold instead of looking for edges . Then you simply search the contours, select the biggest one (graph) and draw it on a blank mask. After that you can perform a bitwise operation on image with the mask and you will get a black image with the graph. If you like the white background better, then simply change all black pixels to white. Steps are written in the example. Hope it helps a bit. Cheers!

Example:

import numpy as np
import cv2

# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)

# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(thresh,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)

# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)

# Convert black pixels back to white
black = np.where(res==0)
res[black[0], black[1], :] = [255, 255, 255]

# Display the image
cv2.imshow('img', res)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result:

EDIT:

For noisier pictures you could try this code. Note that different graphs have different noises and may not work on every graph image since the denoisiation process would be specific in every case. For different noises you can use different ways to denoise it, for example histogram equalization, eroding, blurring etc. This code works well for all 3 graphs. Steps are written in comments. Hope it helps. Cheers!

import numpy as np
import cv2


# Read the image and create a blank mask
img = cv2.imread('graph.png')
h,w = img.shape[:2]
mask = np.zeros((h,w), np.uint8)

# Transform to gray colorspace and threshold the image
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

# Perform opening on the thresholded image (erosion followed by dilation)
kernel = np.ones((2,2),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

# Search for contours and select the biggest one and draw it on mask
_, contours, hierarchy = cv2.findContours(opening,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
cnt = max(contours, key=cv2.contourArea)
cv2.drawContours(mask, [cnt], 0, 255, -1)

# Perform a bitwise operation
res = cv2.bitwise_and(img, img, mask=mask)

# Threshold the image again
gray = cv2.cvtColor(res,cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

# Find all non white pixels
non_zero = cv2.findNonZero(thresh)

# Transform all other pixels in non_white to white
for i in range(0, len(non_zero)):
    first_x = non_zero[i][0][0]
    first_y = non_zero[i][0][1]
    first = res[first_y, first_x]
    res[first_y, first_x] = 255

# Display the image
cv2.imshow('img', res)
cv2.waitKey(0)
cv2.destroyAllWindows()

Result: