I need to download a file from one of those links with python, but the pdf won't open after downloaded. https://fnet.bmfbovespa.com.br/fnet/publico/exibirDocumento?id=693676 https://fnet.bmfbovespa.com.br/fnet/publico/downloadDocumento?id=693676
import requests
i = ["https://fnet.bmfbovespa.com.br/fnet/publico/exibirDocumento?id=693676", "https://fnet.bmfbovespa.com.br/fnet/publico/downloadDocumento?id=693676"]
l =0
for k in i:
l += 1
user_agent = "scrapping_script/1.0"
headers = {'User-Agent': user_agent}
download = requests.get(k, headers=headers)
with open(f"/Users/renato/Documents/{l}.pdf", 'wb') as f:
f.write(download.content)
I already tried using urllib and changing the headers, but had the same issue. Any suggestions? Thanks!
If I display downloaded file (in normal text editor, or in console) then I see string with characteristic ==
at the end which suggests that it file encoded with base64
- so it needs
import base64
content = base64.b64decode(download.content)
That's all
Full working code which I used for tests.
I had to add verify=False
because of problem with SSL verification.
import base64
import requests
items = [
"https://fnet.bmfbovespa.com.br/fnet/publico/exibirDocumento?id=693676",
"https://fnet.bmfbovespa.com.br/fnet/publico/downloadDocumento?id=693676"
]
headers = {
'User-Agent': "scrapping_script/1.0",
#'User-Agent': "Mozilla/5.0 (X11; Linux x86_64; rv:127.0) Gecko/20100101 Firefox/127.0",
}
for index, url in enumerate(items, 1):
print('url:', url)
response = requests.get(url, headers=headers, verify=False)
#for key, val in response.headers.items():
# print(f'{key}: {val}')
if 'Content-Disposition' in response.headers:
print('Content-Disposition:', response.headers['Content-Disposition'])
filename = response.headers['Content-Disposition'].split('filename=')[-1].strip('"')
print('filename:', filename)
content = base64.b64decode(response.content)
#print(content)
#filename = f"/Users/renato/Documents/{index:02}.pdf"
filename = f"{index:02}.pdf"
print('filename:', filename)
with open(filename, 'wb') as f:
f.write(content)