I was using below code to collect bitcoin data:
# -*- coding: UTF-8 -*-
import os
import re
import requests
import datetime
from bs4 import BeautifulSoup
first_date = datetime.datetime(2010,7,16)
def parse_record(record):
new_date = record[1:23]
date = datetime.datetime(int(record[11:15]), int(record[16:18]), int(record[19:21]))
value = record[24:-1]
# print(record1[11:15])
# return [date,value]
# translation = {39: None}
# return str([date,value]).translate(translation)
if date > first_date:
return str([new_date,value]).translate(str.maketrans({"'":None}))
translation = {39: None}
url_price = 'https://bitinfocharts.com/comparison/bitcoin-price.html#alltime'
response_price = requests.get(url_price)
soup_price = BeautifulSoup(response_price.text,'lxml')
script_tag_price = soup_price.findAll('script')[4]
script_text_price = script_tag_price.text
pattern_price = re.compile(r'\[new Date\("\d{4}/\d{2}/\d{2}"\),\d*\.?\w*\]')
records_price = pattern_price.findall(script_text_price)
price = []
for record in records_price:
price.append(parse_record(record))
text_file = open("btc-price.txt", "w")
text_file.write(str(price).replace("None,","")[1:-1].translate(translation).translate(str.maketrans({" ":None})).replace("newDate", "new Date"))
text_file.close()
One year ago it was working ok that I can get the price info in txt. But now it cannot work, it seems that the website page somehow is changed that the code cannot work again, I am new to code, anyone can help me?
Ur code is too complicated for such a simple task, u did not indicate in what format the result should be saved, I ll give an example of just the output with pandas. But full dict in result variable
import pandas as pd
import re
from datetime import datetime
response = requests.get('https://bitinfocharts.com/comparison/bitcoin-price.html#alltime')
result = [{'Date': str(datetime.strptime(str(x.split(',')[0]), '"%Y/%m/%d")').date()),
'Value': x.split(',')[1]} for x in re.findall(r'"\d+\/\d+\/\d+"\),\d+', response.text)]
df = pd.DataFrame(result)
print(df)
OUTPUT:
Date Value
0 2010-07-17 0
1 2010-07-18 0
2 2010-07-19 0
3 2010-07-20 0
4 2010-07-21 0
... ... ...
4845 2023-10-22 29916
4846 2023-10-23 30877
4847 2023-10-24 34086
4848 2023-10-25 34329
4849 2023-10-26 34259