import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
import time
import numpy as np
import pylab as pl
import tqdm
pd.set_option('display.max_columns', None)
twins = pd.read_csv('twins.csv')
print(twins.shape)
twins.head(n=10)
Note: No coding is required to answer 3a.
singletons = pd.read_csv('singletons.csv')
singletons.head()
X = ['dmage', 'dmar', 'dlivord', 'anemia', 'cardiac', 'lung', 'diabetes', 'herpes',\
'hydra', 'hemo', 'chyper', 'phyper', 'eclamp', 'incervix', 'pre4000', 'preterm', \
'renal', 'rh', 'uterine', 'othermr', 'alcohol',\
'm_race_black', 'm_race_other', 'm_race_white', \
'm_edu_college', 'm_edu_elementary', 'm_edu_highschool', 'm_edu_morethancollege', 'm_edu_noedu']
T = 'tobacco'
O = 'dbirwt'