pythonselenium-webdriverweb-scrapingselenium-chromedrivermouse

Mouve mouse, human like, with Python/Selenium (like pptr ghost-cursor)


I try this code: Human-like mouse movements via Selenium but trying to figure out how to integrate it in a real life scraper to follow with my mouse with different DOM elements:

#!/usr/bin/python
# https://stackoverflow.com/questions/39422453/human-like-mouse-movements-via-selenium
import os
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import numpy as np
import scipy.interpolate as si

#curve base
points = [[-6, 2], [-3, -2],[0, 0], [0, 2], [2, 3], [4, 0], [6, 3], [8, 5], [8, 8], [6, 8], [5, 9], [7, 2]];
points = np.array(points)
x = points[:,0]
y = points[:,1]

t = range(len(points))
ipl_t = np.linspace(0.0, len(points) - 1, 100)

x_tup = si.splrep(t, x, k=3)
y_tup = si.splrep(t, y, k=3)

x_list = list(x_tup)
xl = x.tolist()
x_list[1] = xl + [0.0, 0.0, 0.0, 0.0]

y_list = list(y_tup)
yl = y.tolist()
y_list[1] = yl + [0.0, 0.0, 0.0, 0.0]

x_i = si.splev(ipl_t, x_list)
y_i = si.splev(ipl_t, y_list)

url = "https://codepen.io/falldowngoboone/pen/PwzPYv"
driver = webdriver.Chrome()
driver.get(url)

action =  ActionChains(driver);

startElement = driver.find_element(By.ID, 'drawer')

# First, go to your start point or Element:
action.move_to_element(startElement);
action.perform();

# https://stackoverflow.com/a/70796266/465183
for mouse_x, mouse_y in zip(x_i, y_i):
    # Here you should reset the ActionChain and the 'jump' wont happen:
    action =  ActionChains(driver)
    action.move_by_offset(mouse_x,mouse_y);
    action.perform();
    print(mouse_x, mouse_y)

Is there a Python module like NodeJS/pptr Ghost Cursor to facilitate integration?

Or anybody here can show us a way to integrate it in a real life scraper?

Created a feature request: https://github.com/SeleniumHQ/selenium/issues/11824


Solution

  • Using pyautogui+Selenium ChromeDriver

    https://youtu.be/zZfPST2QS-g

    I think there's a better way, using Bezier curves as I do here and Selenium ActionsChains like your github links suggest, overriding class to do something like driver.move_to_element() and driver.random_mouse(), but this is working well for simple requirements:

    git clone https://github.com/sputnick-dev/pyautogui-with-selenium.git
    cd pyautogui-with-selenium
    ./run
    

    Code to avoid link only answer:

    paw file:

    #!/usr/bin/env python3
    import random
    import bezier
    import pyautogui
    import numpy as np
    from time import sleep
    from random import uniform as randfloat
    from selenium.webdriver.common.by import By
    from selenium.common.exceptions import TimeoutException
    from selenium import webdriver
    
    def slow_type(element, text: str):
        """Send a text to an element one character at a time with a delay."""
        for character in text:
            element.send_keys(character)
            sleep(randfloat(0.05,0.3))
    
    def resting_mouse():
        """move mouse to right of screen."""
    
        panelWidht = driver.execute_script('return window.outerWidth;')
        start = pyautogui.position()
        end = random.randint(panelWidht-100, panelWidht), random.randint(400,850)
    
        x2 = (start[0] + end[0])/2 #midpoint x
        y2 = (start[1] + end[1]) / 2 ##midpoint y
    
        control1X = (start[0] + x2)/2
        control2X = (end[0] + x2) / 2
    
        # Two intermediate control points that may be adjusted to modify the curve.
        control1 = control1X, y2 ##combine midpoints to create perfect curve
        control2 = control2X, y2 ## using y2 for both to get a more linear curve
    
        # Format points to use with bezier
        control_points = np.array([start, control1, control2, end])
        points = np.array([control_points[:, 0], control_points[:, 1]])  # Split x and y coordinates
        # You can set the degree of the curve here, should be less than # of control points
        degree = 3
        # Create the bezier curve
        curve = bezier.Curve(points, degree)
    
        curve_steps = 70  # How many points the curve should be split into. Each is a separate pyautogui.moveTo() execution
        delay = 0.008  # Time between movements. 1/curve_steps = 1 second for entire curve
    
        # Move the mouse
        for j in range(1, curve_steps + 1):
            # The evaluate method takes a float from [0.0, 1.0] and returns the coordinates at that point in the curve
            # Another way of thinking about it is that i/steps gets the coordinates at (100*i/steps) percent into the curve
            x, y = curve.evaluate(j / curve_steps)
            pyautogui.moveTo(x, y)  # Move to point in curve
            pyautogui.sleep(delay)  # Wait delay
        sleep(2)
    
    def bezier_mouse(location, size, panelHeight): ##move mouse to middle of element
        x, relY = location["x"], location["y"] ##abs X and relative Y
        absY = relY + panelHeight
        w, h = size["width"], size["height"]
        wCenter = w/2
        hCenter = h/2
        xCenter = int(wCenter + x)
        yCenter = int(hCenter + absY)
    
        start = pyautogui.position()
        end = xCenter, yCenter
    
        x2 = (start[0] + end[0]) / 2 #midpoint x
        y2 = (start[1] + end[1]) / 2 ##midpoint y
    
        control1X = (start[0] + x2) / 2
        control1Y = (end[1] + y2) / 2
    
        control2X = (end[0] + x2) / 2
        control2Y = (start[1] + y2) / 2
    
        # Two intermediate control points that may be adjusted to modify the curve.
        control1 = control1X, y2 ##combine midpoints to create perfect curve
        control2 = control2X, y2
    
        # Format points to use with bezier
        control_points = np.array([start, control1, control2, end])
        points = np.array([control_points[:, 0], control_points[:, 1]])  # Split x and y coordinates
        # You can set the degree of the curve here, should be less than # of control points
        degree = 3
    
        # Create the bezier curve
        curve = bezier.Curve(points, degree)
    
        curve_steps = 70  # How many points the curve should be split into. Each is a separate pyautogui.moveTo() execution
        delay = 0.008  # Time between movements. 1/curve_steps = 1 second for entire curve
    
        # Move the mouse
        for j in range(1, curve_steps + 1):
            # The evaluate method takes a float from [0.0, 1.0] and returns the coordinates at that point in the curve
            # Another way of thinking about it is that i/steps gets the coordinates at (100*i/steps) percent into the curve
            x, y = curve.evaluate(j / curve_steps)
            pyautogui.moveTo(x, y)  # Move to point in curve
            pyautogui.sleep(delay)  # Wait delay
    
    
    # Disable pyautogui pauses
    pyautogui.MINIMUM_DURATION = 0
    pyautogui.MINIMUM_SLEEP = 0
    pyautogui.PAUSE = 0
    # avoid Exception when mouse if in a top corner
    pyautogui.FAILSAFE = False
    
    # instanciate Chrome websriver
    driver = webdriver.Chrome()
    driver.maximize_window()
    driver.implicitly_wait(0.5)
    
    try:
        driver.get("http://localhost:8000")
    except TimeoutException:
        driver.execute_script("window.stop();")
    
    sleep(2)
    e1 = driver.find_element(By.ID, "change-me-paw")
    e2 = driver.find_element(By.ID, "textarea")
    
    panelHeight = driver.execute_script('return window.outerHeight - window.innerHeight;')
    
    location1 = e1.location ## coords of element1
    location2 = e2.location ## coords of element2
    size1 = e1.size ## size of element1
    size2 = e2.size ## size of element2
    sleep(3)
    bezier_mouse(location1, size1, panelHeight)
    e1.click()
    sleep(2)
    bezier_mouse(location2, size2, panelHeight)
    slow_type(e2, 'I type like a human being...')
    sleep(2)
    resting_mouse()
    sleep(2)
    driver.close()
    

    The rest of artefacts are in repository.

    run wrapper script:

    #!/bin/bash
    
    trap 'kill $pid $$' 1 2 3 15
    
    [[ ! -e .req ]] && pip3 install -r requirements.txt && touch .req
    cd tests/
    python3 -m http.server &
    pid=$!
    cd ..
    ./paw
    kill $pid &>/dev/null