import pandas as pd
cancer = pd.read_excel('cancer_survive.xlsx')
|
type |
time |
delta |
0 |
1 |
1 |
1 |
1 |
1 |
3 |
1 |
2 |
1 |
3 |
1 |
3 |
1 |
4 |
1 |
4 |
1 |
10 |
1 |
cancer.type.value_counts()
1 52
2 28
Name: type, dtype: int64
import seaborn as sns
sns.stripplot(data=cancer, x='time', hue='delta')
<Axes: xlabel='time'>
카플란 마이어 추정치
from lifelines import KaplanMeierFitter
kmf = KaplanMeierFitter()
kmf.fit(cancer['time'], cancer['delta'])
<lifelines.KaplanMeierFitter:"KM_estimate", fitted with 80 total observations, 27 right-censored observations>
kmf.plot_survival_function()
<Axes: xlabel='timeline'>
cancer1 = cancer.query('type == 1')
kmf1 = KaplanMeierFitter()
kmf1.fit(cancer1['time'], event_observed=cancer1['delta'], label='type 1')
<lifelines.KaplanMeierFitter:"type 1", fitted with 52 total observations, 21 right-censored observations>
cancer2 = cancer.query('type == 2')
kmf2 = KaplanMeierFitter()
kmf2.fit(cancer2['time'], event_observed=cancer2['delta'], label='type 2')
<lifelines.KaplanMeierFitter:"type 2", fitted with 28 total observations, 6 right-censored observations>
ax = kmf1.survival_function_.plot()
kmf2.survival_function_.plot(ax=ax)
<Axes: xlabel='timeline'>
생존 함수 차이의 통계적 가설 검정
from lifelines.statistics import logrank_test
res = logrank_test(
cancer1['time'], cancer2['time'],
cancer1['delta'], cancer2['delta'], alpha=.95)
res.print_summary()
t_0 |
-1 |
null_distribution |
chi squared |
degrees_of_freedom |
1 |
alpha |
0.95 |
test_name |
logrank_test |
|
test_statistic |
p |
-log2(p) |
0 |
2.79 |
0.09 |
3.40 |
넬슨 알렌 추정치
from lifelines import NelsonAalenFitter
naf1 = NelsonAalenFitter()
naf1.fit(cancer1['time'], event_observed=cancer1['delta'],
label='type 1')
naf1.plot()
<Axes: xlabel='timeline'>