Electronic Device Bans and Academic Performance: New York City Public Schools

Ethan Witkowski Spring 2019

## Warning: package 'readxl' was built under R version 3.6.2

#Import Dataset
NYCLUSchools <- read_excel("NYC Regents Test Data.xlsx")

#Remove duplicates
NYCLUSchools <- NYCLUSchools[!(NYCLUSchools$BEDS == 332100011400 | NYCLUSchools$BEDS == 321000011430), ]

#Intervention Variable (Before Treatment, After Treatment)
NYCLUSchools$BTAT <- ifelse(NYCLUSchools$Year<=2015,0,1)

#Variable recording the time elapased since beginning of study
StartYear <- 2007
NYCLUSchools$Timeelapsed <- NYCLUSchools$Year - StartYear

Variable documentation

Year - Year in which exam was taken
Exam - Regents Exam Type - Global History/Geography, Living Environment (Biology)
Percent Passing - Percentage of students passing Regents Exam at each school for each year
BEDS - Unique School ID
FSM - Percentage of students who are eligible for free lunch at each school for each year
Total tested - Number of students who took the exam at each school for each year
Demographic - Population of students - All students less special education students
Timeelapased - time elapsed since beginning of study at each school for each year
BTAT - Indicator variable for before treatment, after treatment

Exploratory Data Analysis

#Histogram - Exam passing rate by school 2007-2017
hist(NYCLUSchools$`Percent Passing`,
     main = "Exam Passing Rate Distribution by School",
     xlab="Exam Passing Rate by School",
     ylab="Frequency")

#NPP - Exam passing rate by school 2007-2017
qqnorm(NYCLUSchools$`Percent Passing`)
qqline(NYCLUSchools$`Percent Passing`)

#Scatterplot - Year and Percent Passing
plot(NYCLUSchools$Year, NYCLUSchools$`Percent Passing`,
     ylim = c(0,100))

#Scatterplot - Year and Percent Passing (Linearity check)
plot(NYCLUSchools$Year, NYCLUSchools$`Percent Passing`,
     ylim = c(-100,200))

#Scatterplot - FSM and Percent Passing
plot(NYCLUSchools$FSM,NYCLUSchools$`Percent Passing`)

## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

#Boxplot Percent Passing ~ School
par(mfrow = c(1,1))
boxplot(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$School)

#Panel (School and exam) effects graphic 
library(lattice)

xyplot(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Year | Exam, data = NYCLUSchools, groups = NYCLUSchools$BEDS,
       type = "o", panel = panel.superpose)

Exploratory Modeling

#T-test with groups 2007-2015, 2016-2017
t.test(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$BTAT)

## 
##  Welch Two Sample t-test
## 
## data:  NYCLUSchools$`Percent Passing` by NYCLUSchools$BTAT
## t = 0.54564, df = 203.67, p-value = 0.5859
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.362816  4.170959
## sample estimates:
## mean in group 0 mean in group 1 
##        55.81385        54.90977

boxplot(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$BTAT)

#Anova School ~ Percent Passing
fit13 <- lm(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$School)
anova(fit13)

## Analysis of Variance Table
## 
## Response: NYCLUSchools$`Percent Passing`
##                      Df Sum Sq Mean Sq F value    Pr(>F)    
## NYCLUSchools$School  54 147418 2729.97  16.175 < 2.2e-16 ***
## Residuals           728 122871  168.78                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Simple linear regression model
plot(NYCLUSchools$Year, NYCLUSchools$`Percent Passing`)
fit1 <- lm(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Year)
abline(fit1)

summary(fit1)

## 
## Call:
## lm(formula = NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Year)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -49.704 -14.077  -0.931  12.923  44.296 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -1178.1055   422.4628  -2.789  0.00542 **
## NYCLUSchools$Year     0.6133     0.2100   2.920  0.00360 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.5 on 781 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.0108, Adjusted R-squared:  0.009536 
## F-statistic: 8.529 on 1 and 781 DF,  p-value: 0.003596

#Loess model
fit2 <- loess(NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Year, span=.5)
plot(NYCLUSchools$Year, NYCLUSchools$`Percent Passing`,
     main = "Local Polynomial Regression",
     xlab="Time",
     ylab="Exam Passing Rate by School")
lines(NYCLUSchools$Year, predict(fit2,newdata = NYCLUSchools$Year), col="red")

#Interrupted Time Series Analysis

#ITSA model
fit3 <- lm(formula = NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Timeelapsed + NYCLUSchools$BTAT + NYCLUSchools$Timeelapsed*NYCLUSchools$BTAT)
summary(fit3)

## 
## Call:
## lm(formula = NYCLUSchools$`Percent Passing` ~ NYCLUSchools$Timeelapsed + 
##     NYCLUSchools$BTAT + NYCLUSchools$Timeelapsed * NYCLUSchools$BTAT)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -47.861 -13.925  -0.861  13.419  46.139 
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 50.8608     1.3081  38.880  < 2e-16
## NYCLUSchools$Timeelapsed                     1.2822     0.2826   4.537 6.61e-06
## NYCLUSchools$BTAT                           31.4811    30.3292   1.038    0.300
## NYCLUSchools$Timeelapsed:NYCLUSchools$BTAT  -4.1709     3.1989  -1.304    0.193
##                                               
## (Intercept)                                ***
## NYCLUSchools$Timeelapsed                   ***
## NYCLUSchools$BTAT                             
## NYCLUSchools$Timeelapsed:NYCLUSchools$BTAT    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.37 on 779 degrees of freedom
##   (71 observations deleted due to missingness)
## Multiple R-squared:  0.02707,    Adjusted R-squared:  0.02332 
## F-statistic: 7.224 on 3 and 779 DF,  p-value: 8.734e-05

ITSA model Visualization

plot(NYCLUSchools$Year, NYCLUSchools$`Percent Passing`,
     main="Interrupted Time Series",
     xlab="Time",
     ylab="Exam Passing Rate by School")

clip(0,2018,-20,110)
abline(v=2015.67)

clip(0,2015.67,0,100)
abline(-2770.9453, 1.4056,col="blue", lwd=2)

clip(2015.67,2018,0,100)
abline(-2770.9453 + 8650.9847, 1.4056 - 4.2943, col="red", lwd=2)

clip(2015.67,2018,0,100)
abline(-2770.9453, 1.4056, col="blue", lwd=2, lty=2)

Advanced Modeling

#Newey-West HAC robust standard errors (fit 3)
library(sandwich)

## Warning: package 'sandwich' was built under R version 3.6.3

library(lmtest)

## Warning: package 'lmtest' was built under R version 3.6.3

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.6.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

HACfit3 <- vcovHAC(fit3)
coeftest(fit3, HACfit3 = vcovHAC(fit3))

## 
## t test of coefficients:
## 
##                                            Estimate Std. Error t value
## (Intercept)                                50.86078    1.30814 38.8804
## NYCLUSchools$Timeelapsed                    1.28216    0.28261  4.5369
## NYCLUSchools$BTAT                          31.48115   30.32919  1.0380
## NYCLUSchools$Timeelapsed:NYCLUSchools$BTAT -4.17089    3.19893 -1.3038
##                                             Pr(>|t|)    
## (Intercept)                                < 2.2e-16 ***
## NYCLUSchools$Timeelapsed                   6.609e-06 ***
## NYCLUSchools$BTAT                             0.2996    
## NYCLUSchools$Timeelapsed:NYCLUSchools$BTAT    0.1927    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#AVP (Added-Variable Plot)
library(car)

## Warning: package 'car' was built under R version 3.6.2

## Loading required package: carData

avPlots(fit3)

Checking Model Assumptions

#Visual Assessment - Linearity & Homoscedasticity
par(mfrow = c(2,2))
plot(fit3)

#Conditional Normality
par(mfrow = c(1,1))
qqnorm(resid(fit3))
qqline(resid(fit3))

#Residual Analysis
plot(fitted(fit3), resid(fit3))

#Breusch-Pagan Test (OLS vs Random Effects)
lmtest::bptest(fit3)

## 
##  studentized Breusch-Pagan test
## 
## data:  fit3
## BP = 4.9051, df = 3, p-value = 0.1789