pythonseleniumnetwork-traffic

Do a specific search for dicts in a list in Python


I am getting traffic network from a website. I want to getting the json file of a location on google maps because of that i need to take a json website link from traffic network. This traffic network I receive is recorded as a list. This list contains words. And every time I refresh the web page, the places in the list change.

its my code here

import time
import json
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
caps = DesiredCapabilities.CHROME
caps['goog:loggingPrefs'] = {'performance': 'ALL'}
driver = webdriver.Chrome(desired_capabilities=caps)
driver.get("websitelinkhere.com")

while True:
    ready = input("Ready?")
    if ready =="y" or "Y":
        html = driver.page_source
        time.sleep(2)


        #metadata dosyasını indiren yeri buluyor.
        timings = driver.execute_script("return window.performance.getEntries();")
        print(type(timings))
        #print(timings)
        for i in range(len(timings)):
            print(i,timings[i])
            print("-------------")


        # close web browser
browser.close()

There are about 500 data in the list.

Output Example :

140 {'connectEnd': 0, 'connectStart': 0, 'decodedBodySize': 0, 'domainLookupEnd': 0, 'domainLookupStart': 0, 'duration': 98.70000000018626, 'encodedBodySize': 0, 'entryType': 'resource', 'fetchStart': 49603, 'initiatorType': 'script', 'name': 'https://maps.googleapis.com/maps/api/js/GeoPhotoService.GetMetadata?pb=!1m4!1sapiv3!11m2!1m1!1b0!2m2!1str-TR!2sUS!3m3!1m2!1e2!2s6BOFuzJhNCDJbDNl_f4GVA!4m57!1e1!1e2!1e3!1e4!1e5!1e6!1e8!1e12!2m1!1e1!4m1!1i48!5m1!1e1!5m1!1e2!6m1!1e1!6m1!1e2!9m36!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e1!2b0!3e3!1m3!1e4!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e3&callback=_callbacks____0lajjuohz', 'nextHopProtocol': '', 'redirectEnd': 0, 'redirectStart': 0, 'renderBlockingStatus': 'non-blocking', 'requestStart': 0, 'responseEnd': 49701.700000000186, 'responseStart': 0, 'secureConnectionStart': 0, 'serverTiming': [], 'startTime': 49603, 'transferSize': 0, 'workerStart': 0}
-------------

this time I found the data I wanted in row 140 of the list ("https://maps.googleapis.com/maps/api/js/GeoPhotoService.GetMetadata") but every time I repeat this process, its place in the list changes.

and the only constant part I want in the above example is ("https://maps.googleapis.com/maps/api/js/GeoPhotoService.GetMetadata"). I need to get the rest of this link("https://maps.googleapis.com/maps/api/js/GeoPhotoService.GetMetadata?pb=!1m4!1sapiv3!11m2!1m1!1b0!2m2!1str-TR!2sUS!3m3%20!1m2!1e2!2s6BOFuzJhNCDJbDNl_f4GVA!4m57!1e1!1e2!1e3!1e4!1e5!1e6!1e8!1e12!2m1!1e1!4m1!1i48!5m1!1e1!5m1!1!1!1!!1m3!1e2!2b1!3e2!1m3!1e2!2b0!3e3!1m3!1e3!2b1!3e2!1m3!1e3!2b0!3e3!1m3!1e8!2b0!3e3!1m3!1e1!2b0!3e!1e4!2b0!3e3!1m3!1e10!2b1!3e2!1m3!1e10!2b0!3e3&callback=_callbacks____0lajjuohz").

How can I do this debugging and finding what I want?


Solution

  • I found a solution like this

    import time
    import json
    from selenium import webdriver
    from bs4 import BeautifulSoup
    from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
    caps = DesiredCapabilities.CHROME
    caps['goog:loggingPrefs'] = {'performance': 'ALL'}
    driver = webdriver.Chrome(desired_capabilities=caps)
    driver.get("xxxxxxx")
    
    while True:
        ready = input("Ready?")
        if ready =="y" or "Y":
            html = driver.page_source
            time.sleep(2)
    
    
            #metadata dosyasını indiren yeri buluyor.
            timings = driver.execute_script("return window.performance.getEntries();")
            print(type(timings))
            #print(timings)
            for i in range(len(timings)):
                for y in timings[i]:
                    url= timings[i][y]
                    
                    alfa = str(url)
                    if (alfa.startswith('https://maps.googleapis.com/maps/api/js/GeoPhotoService.GetMetadata?') == True):
                        realurl = url
                        print (realurl)
    
                    
    
                
            # close web browser
    browser.close()