I am trying to export .har
file using firefox-selenium-browsermob-proxy-python
. Using the below code.
bmp_loc = "/Users/project/browsermob-proxy-2.1.4/bin/browsermob-proxy"
server = Server(bmp_loc)
server.start()
proxy = server.create_proxy(params={'trustAllServers': 'true'})
selenium_proxy = proxy.selenium_proxy()
caps = webdriver.DesiredCapabilities.FIREFOX
caps['marionette'] = False
proxy_settings = {
"proxyType": "MANUAL",
"httpProxy": selenium_proxy.httpProxy,
"sslProxy": selenium_proxy.sslProxy,
}
caps['proxy'] = proxy_settings
driver = webdriver.Firefox(desired_capabilities=caps)
proxy.new_har("generated_har",options={'captureHeaders': True})
driver.get("someurl")
browser_logs = proxy.har
I am interested to get _transferSize
in the .har
file to perform some analysis but unable to get that, instead I am getting that as 'comment':
"redirectURL": "", "headersSize": 1023, "bodySize": 38, "comment": ""
whereas manually downloading the .har
file using firefox I am getting _transferSize
Version used:
browsermob_proxy==2.1.4
selenium==4.0.0
Can anybody please help me to resolve this?
You can get _transferSize by adding headersSize and bodySize from the har file itself.
urls = ["https://google.com"]
for ur in urls:
server = proxy.start_server()
client = proxy.start_client()
client.new_har("demo.com")
# print(client.proxy)
options = webdriver.ChromeOptions()
options.add_argument("--disk-cache-size=0")
options = {
'enable_har': True
}
driver = webdriver.Chrome(seleniumwire_options=options)
driver.request_interceptor = proxy.interceptor
driver.get(ur)
time.sleep(40)
row_list = []
json_dictionary = json.loads(driver.har)
repeat_url_list = []
repeat_urls = defaultdict(lambda:[])
resp_size = 0
count_url = 0
url_time = 0
status_list = []
status_url = defaultdict(lambda:[])
a_list = []
with open("network_log2.har", "w", encoding="utf-8") as f:
# f.write(json.dumps(driver.har))
for i in json_dictionary['log']['entries']:
f.write(str(i))
f.write("\n")
url = i['request']['url']
a_list.append(url)
timing = i['time']
if timing>2000:
timing = round(timing/2000,1)
url_time += 1
status = i['response']['status']
if status in status_list:
status_url[status] = status_url[status] + 1
else:
status_url[status] = 1
status_list.append(status)
size = i['response']['headersSize'] + i['response']['bodySize']
if size//1000 > 500:
resp_size += 1
if url in repeat_url_list:
repeat_urls[url] = 1
else:
repeat_url_list.append(url)
rurl_count = len(repeat_urls)