I am trying to use zipline for backtesting and I am getting this error:
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
It seems to mean that the contents of the CSV are not being loaded into the dataframe.
I am doing it from a Jupyter notebook, with the follow two cells: Cell 1 (runs correctly)
import pandas as pd
from collections import OrderedDict
import pytz
data = OrderedDict()
tickers = ["btc"]
for ticker in tickers:
data[ticker] = pd.read_csv("{}.csv".format(ticker), index_col=0, parse_dates=['date'])
data[ticker] = data[ticker][["open","high","low","close","volume"]]
# print(data[ticker].head())
panel = pd.Panel(data)
panel.minor_axis = ["open","high","low","close","volume"]
panel.major_axis = panel.major_axis.tz_localize(pytz.utc)
print(panel)
Cell 2 (throws the error):
from zipline.api import order, record, symbol, set_benchmark
import zipline
from datetime import datetime
def initialize(context):
set_benchmark(symbol("btc"))
def handle_data(context, data):
order(symbol("btc"), 10)
record(btc=data.current(symbol("btc"), "price"))
perf = zipline.run_algorithm(start=datetime(2019, 7, 8, 0, 0, 0, 0, pytz.utc),
end=datetime(2020, 7, 6, 0, 0, 0, 0, pytz.utc),
initialize=initialize,
capital_base=100000,
handle_data=handle_data,
data=panel)
This is the full error:
JSONDecodeError Traceback (most recent call last)
<ipython-input-18-0f794aa11983> in <module>
15 capital_base=100000,
16 handle_data=handle_data,
---> 17 data=panel)
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in run_algorithm(start, end, initialize, capital_base, handle_data, before_trading_start, analyze, data_frequency, data, bundle, bundle_timestamp, trading_calendar, metrics_set, default_extension, extensions, strict_extensions, environ, blotter)
428 local_namespace=False,
429 environ=environ,
--> 430 blotter=blotter,
431 )
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
186 trading_calendar=trading_calendar,
187 trading_day=trading_calendar.day,
--> 188 trading_days=trading_calendar.schedule[start:end].index,
189 )
190 choose_loader = None
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/finance/trading.py in __init__(self, load, bm_symbol, exchange_tz, trading_calendar, trading_day, trading_days, asset_db_path, future_chain_predicates, environ)
101 trading_day,
102 trading_days,
--> 103 self.bm_symbol,
104 )
105
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/loader.py in load_market_data(trading_day, trading_days, bm_symbol, environ)
147 # date so that we can compute returns for the first date.
148 trading_day,
--> 149 environ,
150 )
151 tc = ensure_treasury_data(
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/loader.py in ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, environ)
214
215 try:
--> 216 data = get_benchmark_returns(symbol)
217 # data = get_benchmark_returns(symbol, first_date, last_date)
218 data.to_csv(get_data_filepath(filename, environ))
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/benchmarks.py in get_benchmark_returns(symbol)
33 # symbol,
34 # 'yahoo',
---> 35 # first_date,
36 # last_date
37 # )
~/Documents/environments/mai_zipline/lib/python3.5/site-packages/requests/models.py in json(self, **kwargs)
896 # used.
897 pass
--> 898 return complexjson.loads(self.text, **kwargs)
899
900 @property
~/.pyenv/versions/3.5.9/lib/python3.5/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
317 parse_int is None and parse_float is None and
318 parse_constant is None and object_pairs_hook is None and not kw):
--> 319 return _default_decoder.decode(s)
320 if cls is None:
321 cls = JSONDecoder
~/.pyenv/versions/3.5.9/lib/python3.5/json/decoder.py in decode(self, s, _w)
337
338 """
--> 339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
~/.pyenv/versions/3.5.9/lib/python3.5/json/decoder.py in raw_decode(self, s, idx)
355 obj, end = self.scan_once(s, idx)
356 except StopIteration as err:
--> 357 raise JSONDecodeError("Expecting value", s, err.value) from None
358 return obj, end
JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Here is the head of the file in question. I am 99% sure this has nothing to do with the issue, but including here in case I'm wrong.
date,open,high,low,close,volume
2019-07-08,11426.14,12332.903999999999,11285.08505532,12256.63541378,456.34736116
2019-07-09,12242.56000004,12781.91871274,12072.528405,12537.46796421,887.46499499
2019-07-10,12555.374,13124.99999996,11560.17126413,12123.21645309,924.87759996
2019-07-11,12101.0,12101.0,11000.0,11350.65999999,832.37897729
2019-07-12,11348.032,11887.79,11085.71500001,11770.22087782,381.94137711
2019-07-13,11770.22087782,11801.101,10843.00000001,11385.385,432.93217781
2019-07-14,11373.28625859,11444.351,10109.83629449,10178.402,595.94915946
2019-07-15,10197.0,11075.19699999,9868.36363636,10835.82941852,618.44546962
2019-07-16,10832.737,11006.208999999999,9363.63639998,9404.234327700002,946.91510511
Why is the CSV's data not loading appropriately? I don't think it's a current directory issue, because if I un-comment print(data[ticker].head())
from the first cell and run the cell, it correctly prints the head of the CSV.
FWIW, I'm following this tutorial: https://www.youtube.com/watch?v=ZEULPvS_eeI
Apparently you have to change something in the configuration of Zipline I found someone with the same problem in the comments sections of the tutorial and this is what he said to do:
In User/Anaconda3/envs/[environment]/Lib/site-packages/zipline/data/benchmark.py
import numpy as np
import pandas as pd
import pandas_datareader.data as pd_reader
def get_benchmark_returns(symbol, first_date, last_date):
data = pd_reader.DataReader(symbol,
'yahoo',
first_date,
last_date
)
data = data['Close']
data[pd.Timestamp('2008-12-15')] = np.nan
data[pd.Timestamp('2009-08-11')] = np.nan
data[pd.Timestamp('2012-02-02')] = np.nan
data = data.fillna(method='ffill')
return data.sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
Notice the use of 'yahoo', since google doesn't work anymore.
In User/Anaconda3/envs/[environment]/Lib/site-packages/zipline/data/loader.py
change
data = get_benchmark_returns(symbol)
to
data = get_benchmark_returns(symbol, first_date, last_date)