import pandas as pd
df = pd.read_excel('manning.xlsx')
df.head()
|
ds |
y |
0 |
2007-12-10 |
9.590761 |
1 |
2007-12-11 |
8.519590 |
2 |
2007-12-12 |
8.183677 |
3 |
2007-12-13 |
8.072467 |
4 |
2007-12-14 |
7.893572 |
from prophet import Prophet
m = Prophet()
m.fit(df)
future = m.make_future_dataframe(periods=365)
future.tail()
|
ds |
3265 |
2017-01-15 |
3266 |
2017-01-16 |
3267 |
2017-01-17 |
3268 |
2017-01-18 |
3269 |
2017-01-19 |
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
|
ds |
yhat |
yhat_lower |
yhat_upper |
0 |
2007-12-10 |
8.837750 |
8.178753 |
9.425359 |
1 |
2007-12-11 |
8.586321 |
7.972422 |
9.216639 |
2 |
2007-12-12 |
8.382230 |
7.757505 |
8.987608 |
3 |
2007-12-13 |
8.360149 |
7.735065 |
8.952163 |
4 |
2007-12-14 |
8.348204 |
7.691369 |
9.002734 |
... |
... |
... |
... |
... |
3265 |
2017-01-15 |
8.206605 |
7.418459 |
8.965539 |
3266 |
2017-01-16 |
8.531590 |
7.774069 |
9.273478 |
3267 |
2017-01-17 |
8.318985 |
7.542990 |
9.073414 |
3268 |
2017-01-18 |
8.151637 |
7.430158 |
8.865084 |
3269 |
2017-01-19 |
8.163528 |
7.455394 |
8.959976 |
3270 rows × 4 columns
m.plot_components(forecast);
from prophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
m = Prophet(changepoints=['2014-01-01'])
forecast = m.fit(df).predict(future)
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)
h1 = pd.read_excel('manning.xlsx', sheet_name='playoff')
h1['holiday'] = 'playoff'
h2 = pd.read_excel('manning.xlsx', sheet_name='superbowls')
h2['holiday'] = 'superbowl'
holidays = pd.concat([h1, h2])
m = Prophet(holidays=holidays)
forecast = m.fit(df).predict(future)
m.plot_components(forecast);
23:26:11 - cmdstanpy - INFO - Chain [1] start processing
23:26:11 - cmdstanpy - INFO - Chain [1] done processing
result = forecast.query('playoff > 0 or superbowl > 0')
result[['ds', 'yhat']].head()
|
ds |
yhat |
34 |
2008-01-13 |
9.983693 |
35 |
2008-01-14 |
10.995413 |
361 |
2009-01-03 |
9.056871 |
362 |
2009-01-04 |
10.080071 |
730 |
2010-01-16 |
9.925495 |
m = Prophet(holidays=holidays)
m.add_country_holidays(country_name='US')
forecast = m.fit(df).predict(future)
23:26:44 - cmdstanpy - INFO - Chain [1] start processing
23:26:44 - cmdstanpy - INFO - Chain [1] done processing
result = forecast.query('`Christmas Day` != 0')
result[['ds', 'Christmas Day', 'yhat']]
|
ds |
Christmas Day |
yhat |
15 |
2007-12-25 |
-0.427969 |
8.206388 |
352 |
2008-12-25 |
-0.427969 |
7.557798 |
708 |
2009-12-25 |
-0.427969 |
8.166067 |
1062 |
2010-12-25 |
-0.427969 |
7.904445 |
1788 |
2012-12-25 |
-0.427969 |
8.540755 |
2152 |
2013-12-25 |
-0.427969 |
8.303612 |
2515 |
2014-12-25 |
-0.427969 |
7.942331 |
2878 |
2015-12-25 |
-0.427969 |
7.580366 |
3244 |
2016-12-25 |
-0.427969 |
7.299214 |
m = Prophet(weekly_seasonality=False)
d = pd.to_datetime(df.ds)
df['on_season'] = (d.dt.month < 2) | (d.dt.month > 8)
df['off_season'] = ~df['on_season']
m.add_seasonality(name='weekly_on_season', period=7, fourier_order=3,
condition_name='on_season')
m.add_seasonality(name='weekly_off_season', period=7, fourier_order=3,
condition_name='off_season')
<prophet.forecaster.Prophet at 0x18374540d30>
forecast = m.fit(df).predict(future)
fig = m.plot_components(forecast);
23:28:33 - cmdstanpy - INFO - Chain [1] start processing
23:28:33 - cmdstanpy - INFO - Chain [1] done processing
df = pd.read_excel('uschange.xlsx')
df.columns = ['ds', 'y', 'Income', 'Production', 'Savings', 'Unemployment']
m = Prophet()
m.add_regressor('Unemployment')
m.add_regressor('Income')
forecast = m.fit(df).predict(df)
23:28:57 - cmdstanpy - INFO - Chain [1] start processing
23:28:57 - cmdstanpy - INFO - Chain [1] done processing
from prophet.utilities import regressor_coefficients
regressor_coefficients(m)
|
regressor |
regressor_mode |
center |
coef_lower |
coef |
coef_upper |
0 |
Unemployment |
additive |
0.007487 |
-0.859028 |
-0.859028 |
-0.859028 |
1 |
Income |
additive |
0.717627 |
0.191213 |
0.191213 |
0.191213 |
from statsmodels.formula.api import ols
ols('y ~ Unemployment + Income', data=df).fit().summary()
OLS Regression Results
Dep. Variable: | y | R-squared: | 0.372 |
Model: | OLS | Adj. R-squared: | 0.365 |
Method: | Least Squares | F-statistic: | 54.40 |
Date: | Sun, 28 May 2023 | Prob (F-statistic): | 2.74e-19 |
Time: | 23:29:47 | Log-Likelihood: | -142.38 |
No. Observations: | 187 | AIC: | 290.8 |
Df Residuals: | 184 | BIC: | 300.4 |
Df Model: | 2 | | |
Covariance Type: | nonrobust | | |
| coef | std err | t | P>|t| | [0.025 | 0.975] |
Intercept | 0.6064 | 0.049 | 12.404 | 0.000 | 0.510 | 0.703 |
Unemployment | -0.8275 | 0.105 | -7.890 | 0.000 | -1.034 | -0.621 |
Income | 0.2038 | 0.042 | 4.822 | 0.000 | 0.120 | 0.287 |
Omnibus: | 8.056 | Durbin-Watson: | 1.864 |
Prob(Omnibus): | 0.018 | Jarque-Bera (JB): | 8.321 |
Skew: | 0.401 | Prob(JB): | 0.0156 |
Kurtosis: | 3.653 | Cond. No. | 3.85 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
df = pd.read_csv('log_visit.csv')
df.y.plot()
<Axes: >
m = Prophet(growth='logistic')
m.fit(df)
23:30:37 - cmdstanpy - INFO - Chain [1] start processing
23:30:37 - cmdstanpy - INFO - Chain [1] done processing
<prophet.forecaster.Prophet at 0x183773b9ed0>
future = m.make_future_dataframe(periods=1826)
future['cap'] = 8.5
fcst = m.predict(future)
m.plot(fcst);