pythonpython-3.xpandaszipline

CSV not being loaded into pandas for zipline project


I am trying to use zipline for backtesting and I am getting this error:

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

It seems to mean that the contents of the CSV are not being loaded into the dataframe.

I am doing it from a Jupyter notebook, with the follow two cells: Cell 1 (runs correctly)

import pandas as pd
from collections import OrderedDict
import pytz

data = OrderedDict()
tickers = ["btc"]

for ticker in tickers:
    data[ticker] = pd.read_csv("{}.csv".format(ticker), index_col=0, parse_dates=['date'])
    data[ticker] = data[ticker][["open","high","low","close","volume"]]
#     print(data[ticker].head())
    
panel = pd.Panel(data)
panel.minor_axis = ["open","high","low","close","volume"]
panel.major_axis = panel.major_axis.tz_localize(pytz.utc)
print(panel)

Cell 2 (throws the error):

from zipline.api import order, record, symbol, set_benchmark
import zipline
from datetime import datetime

def initialize(context):
    set_benchmark(symbol("btc"))
    
def handle_data(context, data):
    order(symbol("btc"), 10)
    record(btc=data.current(symbol("btc"), "price"))
    
perf = zipline.run_algorithm(start=datetime(2019, 7, 8, 0, 0, 0, 0, pytz.utc),
                             end=datetime(2020, 7, 6, 0, 0, 0, 0, pytz.utc),
                             initialize=initialize,
                             capital_base=100000,
                             handle_data=handle_data,
                             data=panel)

This is the full error:

JSONDecodeError                           Traceback (most recent call last)
<ipython-input-18-0f794aa11983> in <module>
     15                              capital_base=100000,
     16                              handle_data=handle_data,
---> 17                              data=panel)

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in run_algorithm(start, end, initialize, capital_base, handle_data, before_trading_start, analyze, data_frequency, data, bundle, bundle_timestamp, trading_calendar, metrics_set, default_extension, extensions, strict_extensions, environ, blotter)
    428         local_namespace=False,
    429         environ=environ,
--> 430         blotter=blotter,
    431     )

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/utils/run_algo.py in _run(handle_data, initialize, before_trading_start, analyze, algofile, algotext, defines, data_frequency, capital_base, data, bundle, bundle_timestamp, start, end, output, trading_calendar, print_algo, metrics_set, local_namespace, environ, blotter)
    186             trading_calendar=trading_calendar,
    187             trading_day=trading_calendar.day,
--> 188             trading_days=trading_calendar.schedule[start:end].index,
    189         )
    190         choose_loader = None

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/finance/trading.py in __init__(self, load, bm_symbol, exchange_tz, trading_calendar, trading_day, trading_days, asset_db_path, future_chain_predicates, environ)
    101             trading_day,
    102             trading_days,
--> 103             self.bm_symbol,
    104         )
    105 

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/loader.py in load_market_data(trading_day, trading_days, bm_symbol, environ)
    147         # date so that we can compute returns for the first date.
    148         trading_day,
--> 149         environ,
    150     )
    151     tc = ensure_treasury_data(

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/loader.py in ensure_benchmark_data(symbol, first_date, last_date, now, trading_day, environ)
    214 
    215     try:
--> 216         data = get_benchmark_returns(symbol)
    217 #        data = get_benchmark_returns(symbol, first_date, last_date)
    218         data.to_csv(get_data_filepath(filename, environ))

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/zipline/data/benchmarks.py in get_benchmark_returns(symbol)
     33 #        symbol,
     34 #        'yahoo',
---> 35 #        first_date,
     36 #        last_date
     37 #    )

~/Documents/environments/mai_zipline/lib/python3.5/site-packages/requests/models.py in json(self, **kwargs)
    896                     # used.
    897                     pass
--> 898         return complexjson.loads(self.text, **kwargs)
    899 
    900     @property

~/.pyenv/versions/3.5.9/lib/python3.5/json/__init__.py in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    317             parse_int is None and parse_float is None and
    318             parse_constant is None and object_pairs_hook is None and not kw):
--> 319         return _default_decoder.decode(s)
    320     if cls is None:
    321         cls = JSONDecoder

~/.pyenv/versions/3.5.9/lib/python3.5/json/decoder.py in decode(self, s, _w)
    337 
    338         """
--> 339         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
    340         end = _w(s, end).end()
    341         if end != len(s):

~/.pyenv/versions/3.5.9/lib/python3.5/json/decoder.py in raw_decode(self, s, idx)
    355             obj, end = self.scan_once(s, idx)
    356         except StopIteration as err:
--> 357             raise JSONDecodeError("Expecting value", s, err.value) from None
    358         return obj, end

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Here is the head of the file in question. I am 99% sure this has nothing to do with the issue, but including here in case I'm wrong.

date,open,high,low,close,volume
2019-07-08,11426.14,12332.903999999999,11285.08505532,12256.63541378,456.34736116
2019-07-09,12242.56000004,12781.91871274,12072.528405,12537.46796421,887.46499499
2019-07-10,12555.374,13124.99999996,11560.17126413,12123.21645309,924.87759996
2019-07-11,12101.0,12101.0,11000.0,11350.65999999,832.37897729
2019-07-12,11348.032,11887.79,11085.71500001,11770.22087782,381.94137711
2019-07-13,11770.22087782,11801.101,10843.00000001,11385.385,432.93217781
2019-07-14,11373.28625859,11444.351,10109.83629449,10178.402,595.94915946
2019-07-15,10197.0,11075.19699999,9868.36363636,10835.82941852,618.44546962
2019-07-16,10832.737,11006.208999999999,9363.63639998,9404.234327700002,946.91510511

Why is the CSV's data not loading appropriately? I don't think it's a current directory issue, because if I un-comment print(data[ticker].head()) from the first cell and run the cell, it correctly prints the head of the CSV.

FWIW, I'm following this tutorial: https://www.youtube.com/watch?v=ZEULPvS_eeI


Solution

  • Apparently you have to change something in the configuration of Zipline I found someone with the same problem in the comments sections of the tutorial and this is what he said to do:

    In User/Anaconda3/envs/[environment]/Lib/site-packages/zipline/data/benchmark.py

    import numpy as np
    
    import pandas as pd
    
    import pandas_datareader.data as pd_reader
    def get_benchmark_returns(symbol, first_date, last_date):
    
        data = pd_reader.DataReader(symbol,
    
                        'yahoo',
    
                        first_date,
    
                        last_date
    
                     )
    
    
    
        data = data['Close']
    
    
    
        data[pd.Timestamp('2008-12-15')] = np.nan
    
        data[pd.Timestamp('2009-08-11')] = np.nan
    
        data[pd.Timestamp('2012-02-02')] = np.nan
    
    
    
        data = data.fillna(method='ffill')
    
    
    
        return data.sort_index().tz_localize('UTC').pct_change(1).iloc[1:]
    

    Notice the use of 'yahoo', since google doesn't work anymore.

    In User/Anaconda3/envs/[environment]/Lib/site-packages/zipline/data/loader.py

    change

    data = get_benchmark_returns(symbol)
    

    to

    data = get_benchmark_returns(symbol, first_date, last_date)