import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
import time
import numpy as np
import pylab as pl
import tqdm

pd.set_option('display.max_columns', None)

Q1: Low Birth Weight Causes Infant Mortality?

twins = pd.read_csv('twins.csv')
print(twins.shape)
twins.head(n=10)

(59052, 35)

a) Why is this data well-suited for matching?

Note: No coding is required to answer 3a.

b) Matching for ATE

c) Generalizability from Counterfactual Twins to Singletons

singletons = pd.read_csv('singletons.csv')
singletons.head()

Q2: Smoking During Pregnancy Causes Low Birth Weight?

X = ['dmage', 'dmar', 'dlivord', 'anemia', 'cardiac', 'lung', 'diabetes', 'herpes',\
     'hydra', 'hemo', 'chyper', 'phyper', 'eclamp', 'incervix', 'pre4000', 'preterm', \
     'renal', 'rh', 'uterine', 'othermr', 'alcohol',\
     'm_race_black', 'm_race_other', 'm_race_white', \
     'm_edu_college', 'm_edu_elementary', 'm_edu_highschool', 'm_edu_morethancollege', 'm_edu_noedu']

T = 'tobacco'
O = 'dbirwt'

Q1: Low Birth Weight Causes Infant Mortality?

a) Why is this data well-suited for matching?

b) Matching for ATE

c) Generalizability from Counterfactual Twins to Singletons

Q2: Smoking During Pregnancy Causes Low Birth Weight?

a) Naive Difference in Cohorts

b) Covariate Adjustment

c) Propensity Score Re-Weighting

	csex	dbirwt	dmage	mrace	dmeduc	dmar	dlivord	mpcb	othermr	tobacco	dfage	frace	dfeduc	infant_id	pair_id	id
0	1	2601	22	black	college	1	2	3	1	0	30	black	highschool	2	3	1
1	2	3069	22	black	college	1	2	3	1	0	30	black	highschool	3	3	2
2	1	2948	24	white	highschool	1	2	1	0	0	30	white	college	4	5	1
3	2	2948	24	white	highschool	1	3	1	0	0	30	white	college	5	5	2
4	1	3345	32	white	highschool	1	2	2	0	0	36	white	highschool	8	9	1
5	2	2863	32	white	highschool	1	3	2	0	0	36	white	highschool	9	9	2
6	2	2098	31	white	morethancollege	1	1	2	0	0	36	white	highschool	10	11	1
7	2	1985	31	white	morethancollege	1	1	2	0	0	36	white	highschool	11	11	2
8	1	2126	24	white	highschool	1	2	2	0	1	22	white	highschool	12	13	1
9	1	1985	24	white	highschool	1	2	2	0	1	22	white	highschool	13	13	2

	Unnamed: 0	Unnamed: 0.1	csex	dbirwt	dmage	mrace	dmeduc	dmar	dlivord	mpcb	uterine	dfage	frace	dfeduc	infant_id	term	m_race_white	m_edu_college	m_edu_highschool	m_edu_morethancollege
0	0	0	1	2977	29	white	highschool	0	2	2	1	22	white	highschool	0	0	1	0	1	0
1	1	1	2	3912	25	white	college	1	2	3	0	25	white	college	1	1	1	1	0	0
2	2	2	1	3317	36	white	morethancollege	1	3	2	0	33	white	college	2	0	1	0	0	1
3	3	3	2	2963	30	white	college	1	2	1	0	31	white	college	3	0	1	1	0	0
4	4	4	2	3572	25	white	highschool	1	3	2	0	26	white	highschool	4	2	1	0	1	0