python-3.xseleniumfirefoxfirefox-headless

Copy Paste not working with headless browser in python selenium


I am using selenium with python to click a button on a webpage. This copies data in csv format on to the clipboard. Then I use the data on clipboard to create an array, which is used further in the program. Everything works fine until I start the webdriver in headless mode. Is there any solution to the problem? Can this entire code be written without selenium? I am open to ideas and improvements in my code.

    try:
        objFFOptions = Options()
        objFFOptions.add_argument('--headless')
        objFFWebDriver = webdriver.Firefox(options= objFFOptions ) # start hidden
        #objFFWebDriver = webdriver.Firefox()
    except:
        print("Error in initiating the Firefox webdriver")
        objFFWebDriver.quit()
        quit()


    try:
        objFFWebDriver.get("https://chartink.com/screener/90dis")
    except:
        print("Error in opening the webpage")
        objFFWebDriver.quit()
        quit()

    # loop for waiting before query data loads
    intAttemptCounter = 0
    boolStockDataFetched = False

    while True:
        intAttemptCounter = intAttemptCounter + 1

        print("\tFetching attempt ", intAttemptCounter)
        try:
            objFilterMessageElement = WebDriverWait(objFFWebDriver, (intDelaySeconds * intAttemptCounter)). \
                until(expected_conditions.presence_of_element_located((By.ID, 'DataTables_Table_0_info')) or \
                      expected_conditions.presence_of_element_located((By.CLASS_NAME, 'dataTables_empty')))

            print("\tEither of the two marker elements found")

            if re.search(r"Filtered\s+[0-9]+\s+stocks\s+\([0-9]+\s+to\s+[0-9]+\)",
                         objFilterMessageElement.text) is not None:
                print("\t",objFilterMessageElement)

                try:
                    # click copy button
                    objFFWebDriver.find_element(By.XPATH, \
                                                "//*[@class='btn btn-default buttons-copy buttons-html5 btn-primary']").click()
                except NoSuchElementException:
                    if intAttemptCounter <= intMaxAttempt:
                        continue

                # store the query result from clipboard to a string
                strCSVData = pyperclip.paste()
                pyperclip.copy("")

                # create array from the csv string containing stock data
                arrDataList = list(csv.reader(StringIO(strCSVData),delimiter='\t'))
                arrFinalDataList = [arrDataRecord[2] for arrDataRecord in arrDataList[3:]]
                
                boolStockDataFetched = True
                break
            elif objFilterMessageElement.text == "No stocks filtered in the Scan":
                print("\t",objFilterMessageElement.text)
                break
            else:
                if intAttemptCounter <= intMaxAttempt:
                    continue

        except TimeoutException:
            print("\tTimeout Exception")
            if intAttemptCounter <= intMaxAttempt:
                continue
            else:
                break

    if boolStockDataFetched == False:
        print("Error in fetching records or no records fetched")
        
    objFFWebDriver.quit()

Solution

  • You probably can't copy-paste in a headless browser. You could, instead, read the data from the visual table.

    However, you don't need Selenium at all anyway, if you use your browser's inspector to look at the requests the page makes, you can formulate something that does a similar sequence, like so:

    import re
    from pprint import pprint
    import requests
    
    sess = requests.Session()
    sess.headers["User-Agent"] = "Mozilla/5.0 Safari/537.36"
    
    # Do initial GET request, grab CSRF token
    resp = sess.get("https://chartink.com/screener/90dis")
    resp.raise_for_status()
    csrf_token_m = re.search(r'<meta name="csrf-token" content="(.+?)" />', resp.text)
    csrf_token = csrf_token_m.group(1)
    
    # Do data query
    resp = sess.post(
        "https://chartink.com/screener/process",
        data={
            "scan_clause": "( {cash} ( latest count( 90, 1 where latest ha-low > latest ichimoku cloud top( 9 , 26 , 52 ) ) = 90 ) )",
        },
        headers={
            "Referer": "https://chartink.com/screener/90dis",
            "x-csrf-token": csrf_token,
            "x-requested-with": "XMLHttpRequest",
        },
    )
    resp.raise_for_status()
    data = resp.json()
    pprint(data)
    

    This prints out e.g.

    {'data': [{'bsecode': None,
               'close': 18389.5,
               'name': 'NIFTY100',
               'nsecode': 'NIFTY100',
               'per_chg': 1.28,
               'sr': 1,
               'volume': 0},
              {'bsecode': '532978',
               'close': 18273.8,
               'name': 'Bajaj Finserv Limited',
               'nsecode': 'BAJAJFINSV',
               'per_chg': 2.25,
               'sr': 2,
               'volume': 207802},
              ...