Developing an Expression Language for Quantitative Financial Modeling
-
Upload
scott-sanderson -
Category
Engineering
-
view
817 -
download
2
Transcript of Developing an Expression Language for Quantitative Financial Modeling
LargeProblem
MediumProblem
MediumProblem
MediumProblem
SmallProblem
SmallProblem
SmallProblem
SmallProblem
SmallProblem
SmallProblem
SmallProblem
SmallProblem
mean median first last stddev
rank() zscore()
factor1>factor2
& |rank() percentile()
cross_product()
Factors FiltersClassifiers
(2)
a+a+a 3a
SQLnumpy
In[3]:
Out[3]:
fromzipline.assetsimportAssetFinderfinder=AssetFinder("sqlite:///data/assets.db")lifetimes=finder.lifetimes(dates=pd.date_range('2001-01-01','2015-10-01'),include_start_date=True,)lifetimes.head(5)
In[4]: daily_count=lifetimes.sum(axis=1)daily_count.plot(title="CompaniesinExistencebyDay");
In[5]: AAPL_prices=pd.read_csv('data_public/AAPL-split.csv',parse_dates=['Date'],index_col='Date',)
defplot_prices(prices):price_plot=prices.plot(title='AAPLPrice',grid=False)price_plot.set_ylabel("Price",rotation='horizontal',labelpad=50)price_plot.vlines(['2014-05-08'],0,700,label="$3.05Dividend",linestyles='dotted',colors='black',)price_plot.vlines(['2014-06-09'],0,700,label="7:1Split",linestyles='--',colors='black',)
price_plot.legend()sns.despine()returnprice_plot
In[6]: plot_prices(AAPL_prices);
In[7]: naive_returns=AAPL_prices.pct_change()naive_returns.plot();
In[8]: frombcolzimportopenfromhumanizeimportnaturalsize
all_prices=open('data/equity_daily_bars.bcolz')min_offset=min(all_prices.attrs['calendar_offset'].itervalues())max_offset=max(all_prices.attrs['calendar_offset'].itervalues())calendar=pd.DatetimeIndex(all_prices.attrs['calendar'])[min_offset:max_offset]
nassets=len(lifetimes.columns)ndates=len(calendar)nfields=len(('id','open','high','low','close','volume','date'))
print"NumberofAssets:%d"%nassetsprint"NumberofDates:%d"%ndatesprint"NaiveDatasetSize:%s"%naturalsize(nassets*ndates*nfields*8)
NumberofAssets:20353NumberofDates:3480NaiveDatasetSize:4.0GB
In[9]: !du-h-d0data/equity_daily_bars.bcolz!du-h-d0data/adjustments.db
299M data/equity_daily_bars.bcolz30M data/adjustments.db
In[9]: !du-h-d0data/equity_daily_bars.bcolz!du-h-d0data/adjustments.db
299M data/equity_daily_bars.bcolz30M data/adjustments.db
In[10]: importpandasaspdfromzipline.utils.tradingcalendarimporttrading_dayfromzipline.pipeline.dataimportUSEquityPricingfromzipline.pipeline.loadersimportUSEquityPricingLoader
loader=USEquityPricingLoader.from_files('data/equity_daily_bars.bcolz','data/adjustments.db')dates=pd.date_range('2014-5-20','2014-06-30',freq=trading_day,tz='UTC',)
In[11]:
Out[11]:
#load_adjusted_array()returnsadictionarymappingcolumnstoinstancesof`AdjustedArray`.(closes,)=loader.load_adjusted_array(columns=[USEquityPricing.close],dates=dates,assets=pd.Int64Index([24,5061]),mask=None,).values()closes
AdjustedArray:
Data:array([[604.4,39.74],[604.55,39.69],[606.28,40.35],...,[90.35,42.02],[90.92,41.73],[91.96,42.24]])
Adjustments:{13:[Float64Multiply(first_row=0,last_row=13,first_col=0,last_col=0,value=0.142860)]}
In[14]:
Out[14]:
dates_iter=iter(dates[4:])window=closes.traverse(5)window
_Float64AdjustedArrayWindowWindowLength:5CurrentBuffer:[[604.439.74][604.5539.69][606.2840.35][607.3340.105][614.1440.12]]RemainingAdjustments:{13:[Float64Multiply(first_row=0,last_row=13,first_col=0,last_col=0,value=0.142860)]}
In[15]: #Thiscellisrunmultipletimestoshowthenumbersscrollingupuntilwehitthesplit.data=next(window)printdataprintnext(dates_iter)
[[604.439.74][604.5539.69][606.2840.35][607.3340.105][614.1440.12]]2014-05-2700:00:00+00:00
float bool
dask