Code
library(tidyverse)
library(ggplot2)
library(lubridate)
library(readxl)
::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE) knitr
Theresa Szczepanski
November 3, 2022
Today’s challenge is to:
(be sure to only include the category tags for the data you use!)
To read in the MCAS_2022
data, I used the following process:
After examining the summary (see tab), I chose to
Filter:
SchoolID : There are several variables that identify our school, I removed all but one, testschoolcode
.
StudentPrivacy: I left the sasid
variable which is a student identifier number, but eliminated all values corresponding to students’ names.
dis
: We are a charter school within our own unique district, therefore any “district level” data is identical to our “school level” data.
Rename
I currently have not renamed variables, but I have a list of variables for which I need to talk with my administration to access a key to understand what they represent. Ideally, after this, I would put
E_
before all ELA
MCAS student performance metric variablesM_
before all Math
MCAS student performance metric variablesS_
before all Science
MCAS student performance metric variablesSI_
before all student demographic characteristic identifying variablesMutate
I left as doubles
mitem1
sgp
)Recode to char
town
Refactor as ord
mperflev
.Recode to date
-dob
using lubridate.
#Filter, rename variables, and mutate values of variables on read-in
MCAS_2022<-read_csv("_data/PrivateSpring2022_MCAS_full_preliminary_results_04830305.csv",
skip=1)%>%
select(-c("sprp_dis", "sprp_sch", "sprp_dis_name", "sprp_sch_name", "sprp_orgtype",
"schtype", "testschoolname", "yrsindis", "conenr_dis"))%>%
#Recode all nominal variables as characters
mutate(testschoolcode = as.character(testschoolcode))%>%
# mutate(sasid = as.character(sasid))%>%
mutate(highneeds = as.character(highneeds))%>%
mutate(lowincome = as.character(lowincome))%>%
mutate(title1 = as.character(title1))%>%
mutate(ever_EL = as.character(ever_EL))%>%
mutate(EL = as.character(EL))%>%
mutate(EL_FormerEL = as.character(EL_FormerEL))%>%
mutate(FormerEL = as.character(FormerEL))%>%
mutate(ELfirstyear = as.character(ELfirstyear))%>%
mutate(IEP = as.character(IEP))%>%
mutate(plan504 = as.character(plan504))%>%
mutate(firstlanguage = as.character(firstlanguage))%>%
mutate(nature0fdis = as.character(natureofdis))%>%
mutate(spedplacement = as.character(spedplacement))%>%
mutate(town = as.character(town))%>%
mutate(ssubject = as.character(ssubject))%>%
#Recode all ordinal variable as factors
mutate(grade = as.factor(grade))%>%
mutate(levelofneed = as.factor(levelofneed))%>%
mutate(eperf2 = recode_factor(eperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(eperflev = recode_factor(eperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"DNT" = "DNT",
"ABS" = "ABS",
.ordered = TRUE))%>%
mutate(mperf2 = recode_factor(mperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(mperflev = recode_factor(mperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"INV" = "INV",
"ABS" = "ABS",
.ordered = TRUE))%>%
# The science variables contain a mixture of legacy performance levels and
# next generation performance levels which needs to be addressed in the ordering
# of these factors.
mutate(sperf2 = recode_factor(sperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(sperflev = recode_factor(sperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"INV" = "INV",
"ABS" = "ABS",
.ordered = TRUE))%>%
#recode DOB using lubridate
mutate(dob = mdy(dob,
quiet = FALSE,
tz = NULL,
locale = Sys.getlocale("LC_TIME"),
truncated = 0
))
view(MCAS_2022)
MCAS_2022
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
adminyear [numeric] | 1 distinct value |
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
testschoolcode [character] | 1. 4830305 |
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
grade [factor] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gradesims [numeric] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
dob [Date] |
|
427 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gender [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
race [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
yrsinmass [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
yrsinmass_num [numeric] |
|
12 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
yrsinsch [numeric] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
highneeds [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
lowincome [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
title1 [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ever_EL [character] | 1. 1 |
|
475 (96.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
EL [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
EL_FormerEL [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
FormerEL [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ELfirstyear [character] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
IEP [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plan504 [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
firstlanguage [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
natureofdis [numeric] |
|
|
380 (76.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
levelofneed [factor] |
|
|
380 (76.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
spedplacement [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
town [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
county [character] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
octenr [numeric] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
conenr_sch [numeric] | 1 distinct value |
|
440 (88.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
conenr_sta [numeric] | 1 distinct value |
|
434 (87.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
access_part [numeric] | 1 distinct value |
|
488 (98.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ealt [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ecomplexity [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
emode [character] | 1. O |
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eteststat [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
wptopdev [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
wpcompconv [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem1 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem2 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem3 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem4 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem5 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem6 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem7 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem8 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem9 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem10 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem11 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem12 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem13 [numeric] |
|
|
75 (15.2%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem14 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem15 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem16 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem17 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem18 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem19 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem20 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem21 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem22 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem23 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem24 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem25 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem26 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem27 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem28 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem29 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem30 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem31 [numeric] |
|
|
135 (27.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem32 [numeric] |
|
|
403 (81.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem33 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem34 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem35 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem36 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem37 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem38 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem39 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eitem40 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
erawsc [numeric] |
|
39 distinct values | 73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
emcpts [numeric] |
|
24 distinct values | 73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eorpts [numeric] |
|
28 distinct values | 73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eperpospts [numeric] |
|
63 distinct values | 73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
escaleds [numeric] |
|
74 distinct values | 74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eperflev [ordered, factor] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eperf2 [ordered, factor] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
enumin [numeric] | 1 distinct value |
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eassess [numeric] |
|
|
70 (14.1%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
esgp [numeric] |
|
96 distinct values | 109 (22.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
idea1 [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
conv1 [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
idea2 [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
conv2 [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
idea3 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
conv3 [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eattempt [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
malt [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mcomplexity [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mmode [character] | 1. O |
|
71 (14.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mteststat [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem1 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem2 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem3 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem4 [numeric] |
|
|
75 (15.2%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem5 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem6 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem7 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem8 [numeric] |
|
|
76 (15.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem9 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem10 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem11 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem12 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem13 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem14 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem15 [numeric] |
|
|
76 (15.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem16 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem17 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem18 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem19 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem20 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem21 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem22 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem23 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem24 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem25 [numeric] |
|
|
75 (15.2%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem26 [numeric] |
|
|
72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem27 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem28 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem29 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem30 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem31 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem32 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem33 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem34 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem35 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem36 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem37 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem38 [numeric] |
|
|
74 (14.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem39 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem40 [numeric] |
|
|
73 (14.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem41 [numeric] |
|
|
432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mitem42 [numeric] |
|
|
432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mrawsc [numeric] |
|
51 distinct values | 72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mmcpts [numeric] |
|
22 distinct values | 72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
morpts [numeric] |
|
38 distinct values | 72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mperpospts [numeric] |
|
67 distinct values | 72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mscaleds [numeric] |
|
80 distinct values | 72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mperflev [ordered, factor] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mperf2 [ordered, factor] |
|
|
72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mnumin [numeric] | 1 distinct value |
|
72 (14.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
massess [numeric] |
|
|
70 (14.1%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
msgp [numeric] |
|
97 distinct values | 107 (21.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mattempt [character] |
|
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
salt [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
scomplexity [logical] |
|
495 (100.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
smode [character] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
steststat [character] |
|
|
183 (37.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ssubject [character] |
|
|
363 (73.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem1 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem2 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem3 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem4 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem5 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem6 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem7 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem8 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem9 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem10 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem11 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem12 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem13 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem14 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem15 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem16 [numeric] |
|
|
242 (48.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem17 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem18 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem19 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem20 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem21 [numeric] |
|
|
243 (49.1%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem22 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem23 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem24 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem25 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem26 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem27 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem28 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem29 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem30 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem31 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem32 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem33 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem34 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem35 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem36 [numeric] |
|
|
240 (48.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem37 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem38 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem39 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem40 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem41 [numeric] |
|
|
239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem42 [numeric] |
|
|
418 (84.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem43 [numeric] |
|
|
487 (98.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem44 [numeric] |
|
|
488 (98.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sitem45 [numeric] |
|
|
488 (98.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
srawsc [numeric] |
|
43 distinct values | 239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
smcpts [numeric] |
|
26 distinct values | 239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sorpts [numeric] |
|
33 distinct values | 239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sperpospts [numeric] |
|
59 distinct values | 239 (48.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sscaleds [numeric] |
|
91 distinct values | 185 (37.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sperflev [ordered, factor] |
|
|
183 (37.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sperf2 [ordered, factor] |
|
|
254 (51.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
snumin [numeric] | 1 distinct value |
|
254 (51.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sassess [numeric] |
|
|
252 (50.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sattempt [character] |
|
|
183 (37.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ela_cd [numeric] |
|
|
363 (73.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
math_cd [numeric] |
|
|
363 (73.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sci_cd [numeric] |
|
|
363 (73.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_e [numeric] | 1 distinct value |
|
419 (84.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_m [numeric] | 1 distinct value |
|
417 (84.2%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_s [numeric] | 1 distinct value |
|
448 (90.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_readaloud [character] |
|
|
492 (99.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_scribe [character] | 1. H |
|
493 (99.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
accom_calculator [numeric] | 1 distinct value |
|
493 (99.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
grade2018 [numeric] |
|
|
224 (45.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
grade2019 [numeric] |
|
|
134 (27.1%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
grade2021 [numeric] |
|
|
94 (19.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
escaleds2018 [numeric] |
|
61 distinct values | 229 (46.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
escaleds2019 [numeric] |
|
71 distinct values | 138 (27.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
escaleds2021 [numeric] |
|
83 distinct values | 96 (19.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mscaleds2018 [numeric] |
|
71 distinct values | 229 (46.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mscaleds2019 [numeric] |
|
77 distinct values | 138 (27.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mscaleds2021 [numeric] |
|
83 distinct values | 95 (19.2%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
esgp2018 [numeric] |
|
81 distinct values | 316 (63.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
esgp2019 [numeric] |
|
91 distinct values | 231 (46.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
esgp2021 [numeric] |
|
88 distinct values | 201 (40.6%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
msgp2018 [numeric] |
|
85 distinct values | 316 (63.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
msgp2019 [numeric] |
|
92 distinct values | 231 (46.7%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
msgp2021 [numeric] |
|
82 distinct values | 200 (40.4%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
summarize [numeric] |
|
|
0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
amend [character] | 1. M |
|
494 (99.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
datachanged [numeric] | 1 distinct value |
|
494 (99.8%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eScaleForm [numeric] | 1 distinct value |
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mScaleForm [numeric] | 1 distinct value |
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sScaleForm [numeric] | 1 distinct value |
|
307 (62.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
eFormType [character] | 1. C |
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mFormType [character] | 1. C |
|
69 (13.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sFormType [character] |
|
|
183 (37.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
days_in_person [numeric] |
|
53 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
member [numeric] |
|
22 distinct values | 0 (0.0%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
ssubject_prior [numeric] |
|
|
435 (87.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sscaleds_prior [numeric] |
|
24 distinct values | 435 (87.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
escaleds.legacy.equivalent [numeric] |
|
14 distinct values | 433 (87.5%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
mscaleds.legacy.equivalent [numeric] |
|
24 distinct values | 432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sscaleds.legacy.equivalent [numeric] |
|
26 distinct values | 425 (85.9%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sscaleds.highest.on.legacy.scale [numeric] |
|
30 distinct values | 363 (73.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
scpi [numeric] |
|
|
432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sscaleds.highest.on.nextGen.scale [numeric] |
|
24 distinct values | 432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
sperf2.highest.on.nextGen.scale [character] |
|
|
432 (87.3%) | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
nature0fdis [character] |
|
|
380 (76.8%) |
Generated by summarytools 1.0.1 (R version 4.2.1)
2022-12-21
To read in MCAS_G9Science2022_ItemAnalysis
, I:
sitem
variable to prepare to join this data set to the Student performance data set.library(readxl)
# G9 Science Item analysis
MCAS_G9Science2022_ItemAnalysis<-read_excel("_data/2022MCASDepartmentalAnalysis.xlsx", sheet = "SG9Physics", skip = 1, col_names= c("sitem", "sType", "sReporting Category", "sStandard", "sItem Desc", "delete", "sItem Possible Points","RT Percent Points", "State Percent Points", "RT-State Diff")) %>%
select(!contains("delete"))%>%
filter(str_detect(sStandard, "HS"))%>%
mutate("sitem" = as.character(sitem))
#view(MCAS_G9Science2022_ItemAnalysis)
# use string r to fix the item #
MCAS_G9Science2022_ItemAnalysis<-
separate(MCAS_G9Science2022_ItemAnalysis, sitem, c("sitem", "delete"))%>%
select(!contains("delete"))%>%
mutate(sitem =
str_c("sitem", sitem))
MCAS_G9Science2022_ItemAnalysis
From our MCAS_2022
data frame summary, we can see that this data set contains performance results from 495 students from Rising Tide Charter Public School on the Spring 2022 Massachusetts Comprehensive Assessment System (MCAS) tests.
For each student, there are values reported for 256 different variables which consist of information from four broad categories
Demographic characteristics of the students themselves (e.g., race, gender, date of birth, town, grade level, years in school, years in Massachusetts, and low income, title1, IEP, 504m and EL status ).
Key assessment features including subject, test format, and accommodations provided
Performance metrics: This includes a students score on individual item strands, e.g.,mitem1
-mitem42
.
Our second data set, MCAS_G9Science2022_ItemAnalysis
is 42 by 9 and consists of 9 variables with information pertaining to the 2022 HS Introductory Physics Item Report. The variables can be broken down into 2 categories:
Details about a given test item: - content Reporting Category
(MF (motion and forces) WA (waves), and EN (energy),
Standard
from the Massachusetts Curriculum Framework,
Item Description
providing the details of what was asked of students.
Summary Performance Metrics:
I am interested in analyzing the 9th Grade Science Performance. To do this, I will select a subset of our data frame. I selected:
Then I filtered out the 10th grade students who did not take the test
When I compared this data frame to the State reported analysis, the state analysis only contains 68 students. To be able to use the state data, I thus filtered out our 10, 10th grade students and only looked at the performance of the 9th grade students. Notably, my data frame has 69 entries while the state is reporting data on only 68 students. I will have to investigate this further.
Since I will join this data frame with the MCAS_G9Science2022_ItemAnalysis
, using sitem
as the key, I need to pivot this data set longer.
As expected, we now have 42 X 69 = 2898 rows.
Now, we should be ready to join our data sets using sitem
as the key. We should have a 2,898 by (9 + 8) = 2,898 by 17 data frame.
As expected, we now have a 2,898 data frame.
When examining our performance relative to the state by subgroups, it is noteworthy that Rising Tide Female Introductory Physics students on average scored lower relative to their peers in the state and Rising Tide Male Introductory students scored higher on average. This trend is not true for Rising Tide MS science students. When we look at our student’s performance by item and by gender, we can see several questions with a larger disparity in performance by gender.
G9Science_Gender<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP)%>%
group_by(gender, sitem)%>%
summarise(sitem_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
ggplot(G9Science_Gender, aes(fill=gender, y=sitem_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
Now let’s examine the items where students had the weakest performance relative to the state.
# G9 Science Calculate Non IEP Gender Analysis
G9Science_Gender_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(`RT-State Diff` <= -10)%>%
filter(IEP == 0)%>%
group_by(gender, sitem) %>%
summarise(gender_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Gender_NonIEP
# Grouped bar Chart Calculate Items
G9Science_Gender_NonIEP%>%
ggplot(aes(fill=gender, y=gender_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Percent Earned",
x= "Weakest Items ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
From our students who are not on IEPs, Male students seem to have had more success with questions where they were required to calculate than our female students. Now, we can examine our students on IEPs.
# G9 Science Calculate IEP Gender Analysis
G9Science_Gender_IEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(`RT-State Diff` <= -10)%>%
filter(IEP != 0)%>%
group_by(gender, sitem) %>%
group_by(gender, sitem) %>%
summarise(gender_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Gender_IEP
G9Science_Gender_IEP%>%
ggplot(aes(fill=gender, y=gender_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="IEP Students" ,
y = "Percent Earned",
x= "Weakest Item ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
It seems as though we have the opposite trend in our students who are on IEP plans. Perhaps the accommodations and modifications of these plans are more beneficial to female students or perhaps the male students on plans have stronger disabilities.
Where is the gender gap the largest? There are many things to examine here and I am running out of time…
G9Science_Male_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(gender == "M")%>%
filter(IEP == 0)%>%
group_by(sitem) %>%
summarise(male_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Female_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(gender == "F")%>%
filter(IEP == 0)%>%
group_by(sitem) %>%
summarise(female_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9ScienceGender<-G9Science_Female_NonIEP%>%
left_join(G9Science_Male_NonIEP, "sitem")%>%
mutate(gender_diff = 100*(female_percent - male_percent))
# pivot_longer(female_percent, male_percent, names_to = gender, values_to = item_percent)
G9ScienceGender
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
sitem [character] |
|
|
0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
female_percent [numeric] |
|
21 distinct values | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
male_percent [numeric] |
|
27 distinct values | 0 (0.0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
gender_diff [numeric] |
|
39 distinct values | 0 (0.0%) |
Generated by summarytools 1.0.1 (R version 4.2.1)
2022-12-21
G9ScienceGender %>%
filter(gender_diff < -10)%>%
ggplot(aes(fill = gender_diff , y = gender_diff, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Diff F to M Percent Earned",
x= "Largest Gender Gap Items ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
Using Prior Math MCAS result to predict Introductory Physics MCAS Performance. Could we use prior Math MCAS scores to identify students who need extra support for their Science MCAS.
#view(MCAS_2022)
MCAS_subset<-select(MCAS_2022, sscaleds, gender, grade, yrsinsch,
, IEP, `plan504`, sattempt, mattempt,mscaleds2021 )%>%
filter((grade == 9) & sattempt != "N")
ggplot(MCAS_subset, aes(x=mscaleds2021, y=sscaleds, color=gender)) +
geom_point(size = 1, color="#69b3a2")+
geom_smooth(method="lm",color="grey", size =.5 )+
labs(title = "2022 Introductory Physics vs. 2021 Math MCAS", y = "Science Scaled Score",
x = "Math Scaled Score")# +
Science Performance by Reporting Category: Should teachers review a given unit more closely.
G9Science_Desc<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
group_by(`sReporting Category`)%>%
summarise(desc_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
ggplot(G9Science_Desc, aes(fill = `sReporting Category`, y=desc_percent, x=`sReporting Category`)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="All Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Reporting Category",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
RT State Difference by Reporting Category
G9Science_StateDiff<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
group_by(`sReporting Category`)%>%
summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE))
ggplot(G9Science_StateDiff, aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="All Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Reporting Category",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
G9Science_Calculate<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
filter(str_detect(`sItem Desc`, "calculate|Calculate")) #%>%
#print( summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE)))
#mean(`RT-State Diff`, na.rm=TRUE)
#G9Science_Calculate
#%>%
# ggplot(aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
# geom_bar(position="dodge", stat="identity") +
# group_by(`sReporting Category`)%>%
# summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE))
#
# ggplot(G9Science_StateDiff, aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
# geom_bar(position="dodge", stat="identity") +
# labs(subtitle ="All Students" ,
# y = "Percent Points Earned",
# x= "Item, Introductory Physics 2022 ",
# title = "Student Item Performance by Reporting Category",
# caption = "2022 HS Introductory Physics MCAS")+
# theme(axis.text.x=element_text(angle=60,hjust=1))
---
title: "Challenge 8"
author: "Theresa Szczepanski"
description: "Joining Data"
date: "11/3/2022"
format:
html:
df-print: paged
toc: true
code-fold: true
code-copy: true
code-tools: true
categories:
- Theresa_Szczepanski
- challenge_8
- MCAS_2022
- MCAS_G9Science2022_ItemAnalysis
---
```{r}
#| label: setup
#| warning: false
#| message: false
library(tidyverse)
library(ggplot2)
library(lubridate)
library(readxl)
knitr::opts_chunk$set(echo = TRUE, warning=FALSE, message=FALSE)
```
## Challenge Overview
Today's challenge is to:
1) read in multiple data sets, and describe the data set using both words and any supporting information (e.g., tables, etc)
2) tidy data (as needed, including sanity checks)
3) mutate variables as needed (including sanity checks)
4) join two or more data sets and analyze some aspect of the joined data
(be sure to only include the category tags for the data you use!)
## MCAS_2022.csv
::: panel-tabset
### Read in Data Set 1
To read in the `MCAS_2022` data, I used the following process:
- Filter out Student Names and then Read in the data
- Examine the summary
- Identify information to filter, mutate, and rename on the read in
- note variables that require further investigation
::: panel-tabset
### Filter, Rename, and Mutate on Read in
After examining the summary (see tab), I chose to
**Filter**:
- _SchoolID_ : There are several variables that identify our school, I removed all
but one, `testschoolcode`.
- _StudentPrivacy_: I left the `sasid` variable which is a student identifier number,
but eliminated all values corresponding to students' names.
- `dis`: We are a charter school within our own unique district, therefore any
"district level" data is identical to our "school level" data.
__Rename__
I currently have not renamed variables, but I have a list of variables for
which I need to talk with my administration to access a key to understand what they represent.
Ideally, after this, I would put
- an `E_` before all `ELA` MCAS student performance metric variables
- an `M_` before all `Math` MCAS student performance metric variables
- an `S_` before all `Science` MCAS student performance metric variables
- an `SI_` before all student demographic characteristic identifying variables
__Mutate__
I left as __doubles__
- variables that measured scores on specific MCAS items e.g., `mitem1`
- variables that measured student growth percentiles (`sgp`)
- variables that counted a student's years in the school system or state.
Recode to __char__
- variables that are __nominal__, e.g., `town`
Refactor as __ord__
- variables that are __ordinal__, e.g., `mperflev`.
Recode to __date__
-`dob` using lubridate.
```{r}
#Filter, rename variables, and mutate values of variables on read-in
MCAS_2022<-read_csv("_data/PrivateSpring2022_MCAS_full_preliminary_results_04830305.csv",
skip=1)%>%
select(-c("sprp_dis", "sprp_sch", "sprp_dis_name", "sprp_sch_name", "sprp_orgtype",
"schtype", "testschoolname", "yrsindis", "conenr_dis"))%>%
#Recode all nominal variables as characters
mutate(testschoolcode = as.character(testschoolcode))%>%
# mutate(sasid = as.character(sasid))%>%
mutate(highneeds = as.character(highneeds))%>%
mutate(lowincome = as.character(lowincome))%>%
mutate(title1 = as.character(title1))%>%
mutate(ever_EL = as.character(ever_EL))%>%
mutate(EL = as.character(EL))%>%
mutate(EL_FormerEL = as.character(EL_FormerEL))%>%
mutate(FormerEL = as.character(FormerEL))%>%
mutate(ELfirstyear = as.character(ELfirstyear))%>%
mutate(IEP = as.character(IEP))%>%
mutate(plan504 = as.character(plan504))%>%
mutate(firstlanguage = as.character(firstlanguage))%>%
mutate(nature0fdis = as.character(natureofdis))%>%
mutate(spedplacement = as.character(spedplacement))%>%
mutate(town = as.character(town))%>%
mutate(ssubject = as.character(ssubject))%>%
#Recode all ordinal variable as factors
mutate(grade = as.factor(grade))%>%
mutate(levelofneed = as.factor(levelofneed))%>%
mutate(eperf2 = recode_factor(eperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(eperflev = recode_factor(eperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"DNT" = "DNT",
"ABS" = "ABS",
.ordered = TRUE))%>%
mutate(mperf2 = recode_factor(mperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(mperflev = recode_factor(mperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"INV" = "INV",
"ABS" = "ABS",
.ordered = TRUE))%>%
# The science variables contain a mixture of legacy performance levels and
# next generation performance levels which needs to be addressed in the ordering
# of these factors.
mutate(sperf2 = recode_factor(sperf2,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
.ordered = TRUE))%>%
mutate(sperflev = recode_factor(sperflev,
"E" = "E",
"M" = "M",
"PM" = "PM",
"NM"= "NM",
"INV" = "INV",
"ABS" = "ABS",
.ordered = TRUE))%>%
#recode DOB using lubridate
mutate(dob = mdy(dob,
quiet = FALSE,
tz = NULL,
locale = Sys.getlocale("LC_TIME"),
truncated = 0
))
view(MCAS_2022)
MCAS_2022
```
### Post Read In Summary
```{r}
# examine the summary to decide how to best set up our data frame
print(summarytools::dfSummary(MCAS_2022,
varnumbers = FALSE,
plain.ascii = FALSE,
style = "grid",
graph.magnif = 0.70,
valid.col = FALSE),
method = 'render',
table.classes = 'table-condensed')
```
:::
### Read in Data Set 2
To read in `MCAS_G9Science2022_ItemAnalysis`, I:
- Selected only the 9th Grade Physics Item Report
- Deleted an extra column from a cell merge
- refactored the `sitem` variable to prepare to join this data set to the Student
performance data set.
- Added an s to the column names as a reminder that all of these columns relate to the Science exam.
```{r}
library(readxl)
# G9 Science Item analysis
MCAS_G9Science2022_ItemAnalysis<-read_excel("_data/2022MCASDepartmentalAnalysis.xlsx", sheet = "SG9Physics", skip = 1, col_names= c("sitem", "sType", "sReporting Category", "sStandard", "sItem Desc", "delete", "sItem Possible Points","RT Percent Points", "State Percent Points", "RT-State Diff")) %>%
select(!contains("delete"))%>%
filter(str_detect(sStandard, "HS"))%>%
mutate("sitem" = as.character(sitem))
#view(MCAS_G9Science2022_ItemAnalysis)
# use string r to fix the item #
MCAS_G9Science2022_ItemAnalysis<-
separate(MCAS_G9Science2022_ItemAnalysis, sitem, c("sitem", "delete"))%>%
select(!contains("delete"))%>%
mutate(sitem =
str_c("sitem", sitem))
MCAS_G9Science2022_ItemAnalysis
view(MCAS_G9Science2022_ItemAnalysis)
```
### Narrative Summary
From our `MCAS_2022` data frame summary, we can see that this data set
contains performance results from 495 students from [Rising Tide Charter Public School](https://risingtide.org/)
on the Spring 2022
[Massachusetts Comprehensive Assessment System (MCAS)](https://www.doe.mass.edu/mcas/default.html)
tests.
For each student, there are values reported for 256 different variables which
consist of information from four broad categories
- *Demographic characteristics* of
the students themselves (e.g., race, gender, date of birth, town, grade level,
years in school, years in Massachusetts, and low income, title1, IEP, 504m
and EL status ).
- *Key assessment features* including subject, test format, and
accommodations provided
- *Performance metrics*: This includes a students score on individual item strands,
e.g.,`mitem1`-`mitem42`.
Our second data set, `MCAS_G9Science2022_ItemAnalysis` is 42 by 9 and consists of
9 variables with information pertaining to the 2022 [HS Introductory Physics Item Report](https://profiles.doe.mass.edu/mcas/mcasitems2.aspx?grade=HS&subjectcode=PHY&linkid=23&orgcode=04830000&fycode=2022&orgtypecode=5&). The variables can be broken down into 2 categories:
Details about a given test item:
- content `Reporting Category` (MF (motion and forces)
WA (waves), and EN (energy),
- `Standard` from the Massachusetts Curriculum Framework,
- `Item Description` providing the details of what was asked of students.
Summary Performance Metrics:
- Here you can see the percentage of points earned by students at Rising Tide on an item
vs. the percentage of points earned by students in Massachusetts.
### Tidy Data to Prep for Join
I am interested in analyzing the 9th Grade Science Performance. To do this, I will
select a subset of our data frame. I selected:
- 9th Grade and 10th grade students (since a few 10th grade students also took the test)
- Scores on the 42 Science Items
- Demographic characteristics of the students.
Then I filtered out the 10th grade students who did not take the test
```{r}
G9ScienceMCAS_2022 <- select(MCAS_2022, contains("sitem"), gender, grade, yrsinsch,
race, IEP, `plan504`, sattempt)%>%
filter((grade == 9) & sattempt != "N")
G9ScienceMCAS_2022<-select(G9ScienceMCAS_2022, !(contains("43")|contains("44")|contains("45")))
view(G9ScienceMCAS_2022)
G9ScienceMCAS_2022
```
When I compared this data frame to the State reported analysis, the state analysis only contains
68 students. To be able to use the state data, I thus filtered out our 10, 10th grade students and only looked at the performance of the 9th grade students. Notably, my data frame has 69 entries while the state is reporting data on only 68 students. I will have to investigate this further.
Since I will join this data frame with the `MCAS_G9Science2022_ItemAnalysis`, using `sitem` as the key, I need to pivot this data set longer.
```{r}
G9ScienceMCAS_2022<- pivot_longer(G9ScienceMCAS_2022, contains("sitem"), names_to = "sitem", values_to = "sitem_score")
view(G9ScienceMCAS_2022)
G9ScienceMCAS_2022
```
As expected, we now have 42 X 69 = 2898 rows.
### Join Data
Now, we should be ready to join our data sets using `sitem` as the key. We should have a
2,898 by (9 + 8) = 2,898 by 17 data frame.
```{r}
G9Science_StudentItem <- G9ScienceMCAS_2022 %>%
left_join(MCAS_G9Science2022_ItemAnalysis, "sitem")%>%
view(G9Science_StudentItem)
G9Science_StudentItem
```
As expected, we now have a 2,898 \times 17 data frame.
### Analysis of Joined Data
When examining our performance relative to the [state by subgroups](https://profiles.doe.mass.edu/mcas/subgroups2.aspx?linkid=25&orgcode=04830305&fycode=2022&orgtypecode=6&), it is noteworthy that Rising Tide Female Introductory Physics students on average scored lower relative to their peers in the state and Rising Tide Male Introductory students scored higher on average. This trend is not true for Rising Tide MS science students. When we look at our student's performance by item and by gender, we can see several questions with a larger disparity in performance by gender.
```{r}
G9Science_Gender<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP)%>%
group_by(gender, sitem)%>%
summarise(sitem_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
ggplot(G9Science_Gender, aes(fill=gender, y=sitem_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
```
Now let's examine the items where students had the weakest performance relative to the state.
```{r}
# G9 Science Calculate Non IEP Gender Analysis
G9Science_Gender_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(`RT-State Diff` <= -10)%>%
filter(IEP == 0)%>%
group_by(gender, sitem) %>%
summarise(gender_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Gender_NonIEP
# Grouped bar Chart Calculate Items
G9Science_Gender_NonIEP%>%
ggplot(aes(fill=gender, y=gender_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Percent Earned",
x= "Weakest Items ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
#coord_flip()
```
From our students who are not on IEPs, Male students seem to have had more success with questions where they were required to calculate than our female students. Now, we can
examine our students on IEPs.
```{r}
# G9 Science Calculate IEP Gender Analysis
G9Science_Gender_IEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(`RT-State Diff` <= -10)%>%
filter(IEP != 0)%>%
group_by(gender, sitem) %>%
group_by(gender, sitem) %>%
summarise(gender_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Gender_IEP
G9Science_Gender_IEP%>%
ggplot(aes(fill=gender, y=gender_percent, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="IEP Students" ,
y = "Percent Earned",
x= "Weakest Item ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
#coord_flip()
```
It seems as though we have the opposite trend in our students who are on IEP plans.
Perhaps the accommodations and modifications of these plans are more beneficial to female students or perhaps the male students on plans have stronger disabilities.
Where is the gender gap the largest? There are many things to examine here and I am running out of time...
```{r}
G9Science_Male_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(gender == "M")%>%
filter(IEP == 0)%>%
group_by(sitem) %>%
summarise(male_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9Science_Female_NonIEP<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sItem Possible Points`, `State Percent Points`, IEP, `RT-State Diff`)%>%
filter(gender == "F")%>%
filter(IEP == 0)%>%
group_by(sitem) %>%
summarise(female_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
G9ScienceGender<-G9Science_Female_NonIEP%>%
left_join(G9Science_Male_NonIEP, "sitem")%>%
mutate(gender_diff = 100*(female_percent - male_percent))
# pivot_longer(female_percent, male_percent, names_to = gender, values_to = item_percent)
G9ScienceGender
print(summarytools::dfSummary(G9ScienceGender,
varnumbers = FALSE,
plain.ascii = FALSE,
style = "grid",
graph.magnif = 0.70,
valid.col = FALSE),
method = 'render',
table.classes = 'table-condensed')
G9ScienceGender %>%
filter(gender_diff < -10)%>%
ggplot(aes(fill = gender_diff , y = gender_diff, x=sitem)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="Non IEP Students" ,
y = "Diff F to M Percent Earned",
x= "Largest Gender Gap Items ",
title = "Item Performance by Gender",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))#+
```
Using Prior Math MCAS result to predict Introductory Physics MCAS Performance. Could we use prior Math MCAS scores to identify students who need extra support for their Science MCAS.
```{r}
#view(MCAS_2022)
MCAS_subset<-select(MCAS_2022, sscaleds, gender, grade, yrsinsch,
, IEP, `plan504`, sattempt, mattempt,mscaleds2021 )%>%
filter((grade == 9) & sattempt != "N")
ggplot(MCAS_subset, aes(x=mscaleds2021, y=sscaleds, color=gender)) +
geom_point(size = 1, color="#69b3a2")+
geom_smooth(method="lm",color="grey", size =.5 )+
labs(title = "2022 Introductory Physics vs. 2021 Math MCAS", y = "Science Scaled Score",
x = "Math Scaled Score")# +
#facet_wrap(vars(gender))
#theme_minimal()
```
Science Performance by Reporting Category: Should teachers review a given unit more closely.
```{r}
G9Science_Desc<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
group_by(`sReporting Category`)%>%
summarise(desc_percent = sum(sitem_score, na.rm=TRUE)/sum(`sItem Possible Points`))
ggplot(G9Science_Desc, aes(fill = `sReporting Category`, y=desc_percent, x=`sReporting Category`)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="All Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Reporting Category",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
```
RT State Difference by Reporting Category
```{r}
G9Science_StateDiff<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
group_by(`sReporting Category`)%>%
summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE))
ggplot(G9Science_StateDiff, aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
geom_bar(position="dodge", stat="identity") +
labs(subtitle ="All Students" ,
y = "Percent Points Earned",
x= "Item, Introductory Physics 2022 ",
title = "Student Item Performance by Reporting Category",
caption = "2022 HS Introductory Physics MCAS")+
theme(axis.text.x=element_text(angle=60,hjust=1))
```
```{r}
G9Science_Calculate<-G9Science_StudentItem%>%
select(gender, sitem, sitem_score, `sItem Desc`, `sReporting Category`, `sItem Possible Points`, `State Percent Points`, `RT-State Diff`, IEP)%>%
filter(str_detect(`sItem Desc`, "calculate|Calculate")) #%>%
#print( summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE)))
#mean(`RT-State Diff`, na.rm=TRUE)
#G9Science_Calculate
#%>%
# ggplot(aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
# geom_bar(position="dodge", stat="identity") +
# group_by(`sReporting Category`)%>%
# summarise(avg_RT_State_Diff = mean(`RT-State Diff`, na.rm=TRUE))
#
# ggplot(G9Science_StateDiff, aes(fill = `sReporting Category`, y=`avg_RT_State_Diff`, x=`sReporting Category`)) +
# geom_bar(position="dodge", stat="identity") +
# labs(subtitle ="All Students" ,
# y = "Percent Points Earned",
# x= "Item, Introductory Physics 2022 ",
# title = "Student Item Performance by Reporting Category",
# caption = "2022 HS Introductory Physics MCAS")+
# theme(axis.text.x=element_text(angle=60,hjust=1))
```
:::