import pandas as pd
df = pd.read_excel('uschange.xlsx')
df.Consumption.plot(legend=True)
df.Income.plot(legend=True)
<Axes: >
from statsmodels.formula.api import ols
ols('Consumption ~ Income', df).fit().summary()
OLS Regression Results
Dep. Variable: | Consumption | R-squared: | 0.159 |
Model: | OLS | Adj. R-squared: | 0.154 |
Method: | Least Squares | F-statistic: | 34.98 |
Date: | Sun, 28 May 2023 | Prob (F-statistic): | 1.58e-08 |
Time: | 11:10:33 | Log-Likelihood: | -169.62 |
No. Observations: | 187 | AIC: | 343.2 |
Df Residuals: | 185 | BIC: | 349.7 |
Df Model: | 1 | | |
Covariance Type: | nonrobust | | |
| coef | std err | t | P>|t| | [0.025 | 0.975] |
Intercept | 0.5451 | 0.056 | 9.789 | 0.000 | 0.435 | 0.655 |
Income | 0.2806 | 0.047 | 5.915 | 0.000 | 0.187 | 0.374 |
Omnibus: | 16.528 | Durbin-Watson: | 1.696 |
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 29.145 |
Skew: | -0.454 | Prob(JB): | 4.69e-07 |
Kurtosis: | 4.707 | Cond. No. | 2.08 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
m = ols('Consumption ~ Income + Production + Unemployment + Savings', df).fit()
m.summary()
OLS Regression Results
Dep. Variable: | Consumption | R-squared: | 0.754 |
Model: | OLS | Adj. R-squared: | 0.749 |
Method: | Least Squares | F-statistic: | 139.5 |
Date: | Sun, 28 May 2023 | Prob (F-statistic): | 2.62e-54 |
Time: | 11:10:34 | Log-Likelihood: | -54.692 |
No. Observations: | 187 | AIC: | 119.4 |
Df Residuals: | 182 | BIC: | 135.5 |
Df Model: | 4 | | |
Covariance Type: | nonrobust | | |
| coef | std err | t | P>|t| | [0.025 | 0.975] |
Intercept | 0.2673 | 0.037 | 7.184 | 0.000 | 0.194 | 0.341 |
Income | 0.7145 | 0.042 | 16.934 | 0.000 | 0.631 | 0.798 |
Production | 0.0459 | 0.026 | 1.773 | 0.078 | -0.005 | 0.097 |
Unemployment | -0.2048 | 0.106 | -1.941 | 0.054 | -0.413 | 0.003 |
Savings | -0.0453 | 0.003 | -16.287 | 0.000 | -0.051 | -0.040 |
Omnibus: | 18.566 | Durbin-Watson: | 2.169 |
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 28.796 |
Skew: | 0.564 | Prob(JB): | 5.58e-07 |
Kurtosis: | 4.556 | Cond. No. | 61.9 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
y_pred = m.predict(df)
df.Consumption.plot(legend=True)
y_pred.plot(legend=True, label='prediction')
<Axes: >
import matplotlib.pyplot as plt
import numpy as np
plt.plot(df.Consumption, y_pred, '.')
x = np.array(plt.gca().get_xlim()) # 그림의 양쪽끝 x 좌표
plt.plot(x, x, '--')
[<matplotlib.lines.Line2D at 0x158d851a290>]
0 0.168638
1 -0.184591
2 -0.051472
3 -0.070916
4 0.267760
...
182 -0.109993
183 -0.037906
184 -0.113017
185 0.128985
186 -0.046047
Length: 187, dtype: float64
0 0.168638
1 -0.184591
2 -0.051472
3 -0.070916
4 0.267760
...
182 -0.109993
183 -0.037906
184 -0.113017
185 0.128985
186 -0.046047
Length: 187, dtype: float64
<Axes: >
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(m.resid);
import pingouin as pg
pg.corr(df.Income, m.resid)
|
n |
r |
CI95% |
p-val |
BF10 |
power |
pearson |
187 |
-1.283695e-15 |
[-0.14, 0.14] |
1.0 |
0.092 |
0.049914 |
|
n |
r |
CI95% |
p-val |
BF10 |
power |
pearson |
187 |
3.885781e-16 |
[-0.14, 0.14] |
1.0 |
0.092 |
0.049914 |
n = len(df)
df['t'] = np.arange(n)
ols('Consumption ~ t', df).fit().summary()
OLS Regression Results
Dep. Variable: | Consumption | R-squared: | 0.025 |
Model: | OLS | Adj. R-squared: | 0.020 |
Method: | Least Squares | F-statistic: | 4.726 |
Date: | Sun, 28 May 2023 | Prob (F-statistic): | 0.0310 |
Time: | 11:12:40 | Log-Likelihood: | -183.46 |
No. Observations: | 187 | AIC: | 370.9 |
Df Residuals: | 185 | BIC: | 377.4 |
Df Model: | 1 | | |
Covariance Type: | nonrobust | | |
| coef | std err | t | P>|t| | [0.025 | 0.975] |
Intercept | 0.9242 | 0.095 | 9.777 | 0.000 | 0.738 | 1.111 |
t | -0.0019 | 0.001 | -2.174 | 0.031 | -0.004 | -0.000 |
Omnibus: | 46.181 | Durbin-Watson: | 1.333 |
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 115.917 |
Skew: | -1.058 | Prob(JB): | 6.74e-26 |
Kurtosis: | 6.225 | Cond. No. | 214. |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
r = (n + 1) // 4
df['season'] = np.tile([1, 2, 3, 4], r)[:187]
ols('Consumption ~ C(season)', df).fit().summary()
OLS Regression Results
Dep. Variable: | Consumption | R-squared: | 0.009 |
Model: | OLS | Adj. R-squared: | -0.007 |
Method: | Least Squares | F-statistic: | 0.5513 |
Date: | Sun, 28 May 2023 | Prob (F-statistic): | 0.648 |
Time: | 11:13:05 | Log-Likelihood: | -184.97 |
No. Observations: | 187 | AIC: | 377.9 |
Df Residuals: | 183 | BIC: | 390.9 |
Df Model: | 3 | | |
Covariance Type: | nonrobust | | |
| coef | std err | t | P>|t| | [0.025 | 0.975] |
Intercept | 0.7356 | 0.096 | 7.667 | 0.000 | 0.546 | 0.925 |
C(season)[T.2] | -0.0296 | 0.136 | -0.218 | 0.828 | -0.297 | 0.238 |
C(season)[T.3] | 0.1143 | 0.136 | 0.842 | 0.401 | -0.153 | 0.382 |
C(season)[T.4] | -0.0424 | 0.136 | -0.311 | 0.756 | -0.312 | 0.227 |
Omnibus: | 29.475 | Durbin-Watson: | 1.283 |
Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 63.812 |
Skew: | -0.718 | Prob(JB): | 1.39e-14 |
Kurtosis: | 5.476 | Cond. No. | 4.78 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.