首先定义如下函数:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats import norm
import numpy as np
from plotnine import *
"""因变量y必须是int:0或1"""
def logistic_regression(y,x,df):
model = smf.glm(formula = f'{y} ~ {x}',
data = df,
family=sm.families.Binomial()).fit()
print(model.summary(), end='\n\n\n')
stat = pd.DataFrame({'p': model.pvalues, # series:每个β的p值
# 'beta': model.params, # series:β
# 'beta_lower_ci': model.conf_int().iloc[:, 0], #df:β的95%CI
# 'beta_upper_ci': model.conf_int().iloc[:, 1], #df:β的95%CI
'OR': np.exp(model.params), # series:OR
'OR_lower_ci': np.exp(model.params - norm.ppf(0.975)*model.bse),
'OR_upper_ci': np.exp(model.params + norm.ppf(0.975)*model.bse)})
stat['sig'] = stat.apply(lambda x : "*" if x['p']<0.05 else "no_sig",axis=1)
stat= stat.sort_values('OR', ascending=True)
print(stat)
"""绘制森林图:OR"""
forest_df = stat.drop("Intercept")\
.reset_index()\
.rename(columns={'index': 'independent_var'})\
.sort_values('OR', ascending=False)
print(ggplot(f

7649

被折叠的 条评论
为什么被折叠?



