시계열 데이터의 회귀분석

import pandas as pd
df = pd.read_excel('uschange.xlsx')
from statsmodels.formula.api import ols
ols('Consumption ~ Income', df).fit().summary()
OLS Regression Results
Dep. Variable: Consumption R-squared: 0.159
Model: OLS Adj. R-squared: 0.154
Method: Least Squares F-statistic: 34.98
Prob (F-statistic): 1.58e-08
Time: 11:10:33 Log-Likelihood: -169.62
No. Observations: 187 AIC: 343.2
Df Residuals: 185 BIC: 349.7
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.5451 0.056 9.789 0.000 0.435 0.655
Income 0.2806 0.047 5.915 0.000 0.187 0.374
Omnibus: 16.528 Durbin-Watson: 1.696
Prob(Omnibus): 0.000 Jarque-Bera (JB): 29.145
Skew: -0.454 Prob(JB): 4.69e-07
Kurtosis: 4.707 Cond. No. 2.08



m = ols('Consumption ~ Income + Production + Unemployment + Savings', df).fit()
OLS Regression Results
Dep. Variable: Consumption R-squared: 0.754
Model: OLS Adj. R-squared: 0.749
Method: Least Squares F-statistic: 139.5
Prob (F-statistic): 2.62e-54
Time: 11:10:34 Log-Likelihood: -54.692
No. Observations: 187 AIC: 119.4
Df Residuals: 182 BIC: 135.5
Df Model: 4
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.2673 0.037 7.184 0.000 0.194 0.341
Income 0.7145 0.042 16.934 0.000 0.631 0.798
Production 0.0459 0.026 1.773 0.078 -0.005 0.097
Unemployment -0.2048 0.106 -1.941 0.054 -0.413 0.003
Savings -0.0453 0.003 -16.287 0.000 -0.051 -0.040
Omnibus: 18.566 Durbin-Watson: 2.169
Prob(Omnibus): 0.000 Jarque-Bera (JB): 28.796
Skew: 0.564 Prob(JB): 5.58e-07
Kurtosis: 4.556 Cond. No. 61.9



y_pred = m.predict(df)
y_pred.plot(legend=True, label='prediction')
import matplotlib.pyplot as plt
import numpy as np
plt.plot(df.Consumption, y_pred, '.')
x = np.array(plt.gca().get_xlim())  # 그림의 양쪽끝 x 좌표
plt.plot(x, x, '--')  
0      0.168638
1     -0.184591
2     -0.051472
3     -0.070916
4      0.267760
182   -0.109993
183   -0.037906
184   -0.113017
185    0.128985
186   -0.046047
Length: 187, dtype: float64
df.Consumption - y_pred
0      0.168638
1     -0.184591
2     -0.051472
3     -0.070916
4      0.267760
182   -0.109993
183   -0.037906
184   -0.113017
185    0.128985
186   -0.046047
Length: 187, dtype: float64
from statsmodels.graphics.tsaplots import plot_acf
import pingouin as pg
pg.corr(df.Income, m.resid)
n r CI95% p-val BF10 power
pearson 187 -1.283695e-15 [-0.14, 0.14] 1.0 0.092 0.049914
pg.corr(y_pred, m.resid)
n r CI95% p-val BF10 power
pearson 187 3.885781e-16 [-0.14, 0.14] 1.0 0.092 0.049914
n = len(df)
df['t'] = np.arange(n)
ols('Consumption ~ t', df).fit().summary()
OLS Regression Results
Dep. Variable: Consumption R-squared: 0.025
Model: OLS Adj. R-squared: 0.020
Method: Least Squares F-statistic: 4.726
Prob (F-statistic): 0.0310
Time: 11:12:40 Log-Likelihood: -183.46
No. Observations: 187 AIC: 370.9
Df Residuals: 185 BIC: 377.4
Df Model: 1
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.9242 0.095 9.777 0.000 0.738 1.111
t -0.0019 0.001 -2.174 0.031 -0.004 -0.000
Omnibus: 46.181 Durbin-Watson: 1.333
Prob(Omnibus): 0.000 Jarque-Bera (JB): 115.917
Skew: -1.058 Prob(JB): 6.74e-26
Kurtosis: 6.225 Cond. No. 214.



r = (n + 1) // 4
df['season'] = np.tile([1, 2, 3, 4], r)[:187]
ols('Consumption ~ C(season)', df).fit().summary()
OLS Regression Results
Dep. Variable: Consumption R-squared: 0.009
Model: OLS Adj. R-squared: -0.007
Method: Least Squares F-statistic: 0.5513
Prob (F-statistic): 0.648
Time: 11:13:05 Log-Likelihood: -184.97
No. Observations: 187 AIC: 377.9
Df Residuals: 183 BIC: 390.9
Df Model: 3
Covariance Type: nonrobust
coef std err t P>|t| [0.025 0.975]
Intercept 0.7356 0.096 7.667 0.000 0.546 0.925
C(season)[T.2] -0.0296 0.136 -0.218 0.828 -0.297 0.238
C(season)[T.3] 0.1143 0.136 0.842 0.401 -0.153 0.382
C(season)[T.4] -0.0424 0.136 -0.311 0.756 -0.312 0.227
Omnibus: 29.475 Durbin-Watson: 1.283
Prob(Omnibus): 0.000 Jarque-Bera (JB): 63.812
Skew: -0.718 Prob(JB): 1.39e-14
Kurtosis: 5.476 Cond. No. 4.78


