Bipartite Network Analysis: Players Attending School

Final Project
Amer Abuhasan
Final Project
Author

Amer Abuhasan

Published

May 12, 2023

Reading Data

Code
# Dataset
df <- CollegePlaying

#First 6 rows of the dataset
kable(head(df),caption = "First 6 rows of Dataset")
First 6 rows of Dataset
playerID schoolID yearID
aardsda01 pennst 2001
aardsda01 rice 2002
aardsda01 rice 2003
abadan01 gamiddl 1992
abadan01 gamiddl 1993
abbeybe01 vermont 1889

Constructing Network

Code
# years having attended more than 300
ylist <- c(1985,1986,1987,1988,1989,1990)
g <- graph.data.frame(df[df$yearID %in% ylist,],directed = T)
V(g)$type <- V(g)$name %in% df[df$yearID %in% ylist,1]
g
IGRAPH 098c44f DN-B 1374 2040 -- 
+ attr: name (v/c), type (v/l), yearID (e/n)
+ edges from 098c44f (vertex names):
 [1] abbotji01->michigan  abbotji01->michigan  abbotji01->michigan 
 [4] abbotky01->ucsd      abbotky01->ucsd      abbotky01->longbeach
 [7] acrema01 ->nmstate   acrema01 ->nmstate   adamsjo02->cacerri  
[10] adkinst01->upenn     adkinst01->upenn     alexage01->tulane   
[13] alexage01->tulane    alexage01->tulane    alicelu01->floridast
[16] alicelu01->floridast allisda01->jamesmad  allisda01->jamesmad 
[19] allisda01->jamesmad  allisda01->jamesmad  allrebe01->lamar    
[22] allrebe01->lamar     aloumo01 ->cacanad   aloumo01 ->cacanad  
+ ... omitted several edges

Get the Largest Component

Code
# GEt the largest component
components <- clusters(g, mode="weak")
biggest_cluster_id <- which.max(components$csize)

# ids
vert_ids <- V(g)[components$membership == biggest_cluster_id]

# subgraph
g <- induced_subgraph(g, vert_ids)

Network Plot

Code
grp <- ifelse(V(g)$type,"Player","School")
nodes <- data.frame(id = V(g)$name, title = V(g)$name, group = grp)
edges <- get.data.frame(g, what="edges")[1:2]
vis.nodes <- nodes
vis.links <- edges
#giving some styles to nodes and edges
vis.nodes$shadow <- TRUE # Nodes will drop shadow
vis.nodes$label  <- vis.nodes$id # Node label
vis.nodes$size   <- degree(g)+20 # Node size
vis.nodes$borderWidth <- 2 # Node border width
vis.links$color <- "gray"    # line color  
vis.links$smooth <- FALSE    # should the edges be curved?
vis.links$shadow <- FALSE    # edge shadow
visnet3 = visNetwork(vis.nodes, vis.links, main = "Network of Player Attending School") %>%
  visPhysics(solver = "forceAtlas2Based",
             forceAtlas2Based = list(gravitationalConstant = -100))
visLegend(visnet3,main = "Groups")

Network Stats

Code
################################## Network Stats ##################################

mx_cliq <- clique.number(g)
k <- data.frame(Measure = c("Nodes","Edges","Radius","Diameter",
                             "Reciprocity","Average Degree",
                             "Density","Number of Clusters",
                             "Largest Clique Size","Number of Largest Cliques",
                             "Average Shortest Path","Clustering Coefficient",
                             "Number of Cores","Number of Triangles"),
                 Value = c(vcount(g),ecount(g),
                           radius(g),
                           diameter(g),
                           reciprocity(g),
                           mean(degree(g,mode = "all")),
                           graph.density(g),
                           clusters(g)$no,mx_cliq,
                           length(cliques(as.undirected(g),min = mx_cliq)),
                           average.path.length(g),
                           transitivity(g),
                           length(unique(coreness(g))),
                           length(triangles(g))))
k$Value <- round(k$Value,5)
kable(k,caption = "Player's Atending School Network Stats")
Player’s Atending School Network Stats
Measure Value
Nodes 245.00000
Edges 431.00000
Radius 13.00000
Diameter 1.00000
Reciprocity 0.00000
Average Degree 3.51837
Density 0.00721
Number of Clusters 1.00000
Largest Clique Size 2.00000
Number of Largest Cliques 252.00000
Average Shortest Path 1.00000
Clustering Coefficient 0.00000
Number of Cores 4.00000
Number of Triangles 0.00000

Degree Distibution

Code
#plot(degree.distribution(g), col="red") #Plot degree distribution
ddist <- degree_distribution(g, mode="all", cumulative=TRUE)
plot(ddist,col="violet",xlab="DEGREE",ylab="DIST", main="The Degree Distribution", pch=19)

Community Detection

Code
wk <- walktrap.community(g)
grp <- wk$membership
nodes <- data.frame(id = V(g)$name, title = V(g)$name, group = grp)
edges <- get.data.frame(g, what="edges")[1:2]
vis.nodes <- nodes
vis.links <- edges
#giving some styles to nodes and edges
vis.nodes$shadow <- TRUE # Nodes will drop shadow
vis.nodes$label  <- vis.nodes$id # Node label
vis.nodes$size   <- degree(g)+20 # Node size
vis.nodes$borderWidth <- 2 # Node border width
vis.links$color <- "gray"    # line color  
vis.links$smooth <- FALSE    # should the edges be curved?
vis.links$shadow <- FALSE    # edge shadow
visnet3 = visNetwork(vis.nodes, vis.links, main = "Walk Trap : Network of Player Attending School") %>%
  visPhysics(solver = "forceAtlas2Based",
             forceAtlas2Based = list(gravitationalConstant = -100))
visLegend(visnet3,main = "Communities")

Centralities

Code
Top10Schools_Degree <- names(sort(degree(g, mode = "in"),decreasing = T))[1:10]
Top10Players_Degree <- names(sort(degree(g, mode = "out"),decreasing = T))[1:10]
Top10_Bonachic <- names(sort(power_centrality(g),decreasing = T))[1:10]
Top10_Eigen <- names(sort(eigen_centrality(g)$vector,decreasing = T))[1:10]
Top10_Betweenness <- names(sort(betweenness(as.undirected(g)),decreasing = T))[1:10]
df <- data.frame(Top10Schools_Degree,
                 Top10Players_Degree,
                 Top10_Bonachic,
                 Top10_Eigen,
                 Top10_Betweenness)
kable(df,caption = "Top 10 nodes (Players & Schools) by Centralities")
Top 10 nodes (Players & Schools) by Centralities
Top10Schools_Degree Top10Players_Degree Top10_Bonachic Top10_Eigen Top10_Betweenness
stanford careypa01 careypa01 stanford arizonast
lsu chitrst01 chitrst01 chitrst01 cacerri
ucla dunbama01 dunbama01 careypa01 casacra
floridast leskacu01 leskacu01 witmero01 benjami01
calstfull livinsc01 livinsc01 amaroru02 vinafe01
fresnost minorbl01 minorbl01 keysebr01 calstfull
usc nielsje01 nielsje01 mcdowja01 lsu
arizonast piattdo01 piattdo01 spencst02 bowieji01
miamifl spehrti01 spehrti01 spraged02 naultda01
cacerri urbanto01 urbanto01 osunaal01 motaan01
Code
#Top 10 Schools with value by Degree
sort(degree(g, mode = "in"),decreasing = T)[1:10]
 stanford       lsu      ucla floridast calstfull  fresnost       usc arizonast 
       41        35        31        30        25        24        20        20 
  miamifl   cacerri 
       17        15 
Code
#Top 10 Players with value by Degree
sort(degree(g, mode = "out"),decreasing = T)[1:10]
careypa01 chitrst01 dunbama01 leskacu01 livinsc01 minorbl01 nielsje01 piattdo01 
        4         4         4         4         4         4         4         4 
spehrti01 urbanto01 
        4         4 
Code
#Top Nodes with values by Bonachic Centrality
sort(power_centrality(g),decreasing = T)[1:10]
careypa01 chitrst01 dunbama01 leskacu01 livinsc01 minorbl01 nielsje01 piattdo01 
 1.906041  1.906041  1.906041  1.906041  1.906041  1.906041  1.906041  1.906041 
spehrti01 urbanto01 
 1.906041  1.906041 
Code
#Top 10 Nodes with values by Eigen
sort(eigen_centrality(g)$vector,decreasing = T)[1:10]
 stanford careypa01 chitrst01 amaroru02 mcdowja01 spencst02 spraged02 witmero01 
1.0000000 0.3795853 0.3795853 0.2846889 0.2846889 0.2846889 0.2846889 0.2846889 
keysebr01 osunaal01 
0.2846889 0.1919559 
Code
#Top 10 Nodes with values by Betweenness
sort(betweenness(as.undirected(g)),decreasing = T)[1:10]
arizonast   cacerri   casacra benjami01  vinafe01 calstfull       lsu bowieji01 
18762.250 18153.500 15685.000 13826.750 13450.500  9586.167  9539.000  9108.000 
naultda01  motaan01 
 8804.167  6235.000 

Network models

Code
g <- asNetwork(g)
#basic ERGM model
m <- ergm(g ~ edges)
summary(m)
Call:
ergm(formula = g ~ edges)

Maximum Likelihood Results:

      Estimate Std. Error MCMC % z value Pr(>|z|)    
edges -5.46477    0.06313      0  -86.57   <1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     Null Deviance: 82873  on 59780  degrees of freedom
 Residual Deviance:  3259  on 59779  degrees of freedom
 
AIC: 3261  BIC: 3270  (Smaller is better. MC Std. Err. = 0)
Code
#with node level attributes
m1 <- ergm(g ~ edges 
           + nodecov("Eigen") 
           + nodecov("Degree")) 
summary(m1)
Call:
ergm(formula = g ~ edges + nodecov("Eigen") + nodecov("Degree"))

Maximum Likelihood Results:

                Estimate Std. Error MCMC % z value Pr(>|z|)    
edges          -6.568340   0.101491      0 -64.718   <1e-04 ***
nodecov.Eigen  -1.167338   0.275706      0  -4.234   <1e-04 ***
nodecov.Degree  0.097185   0.004308      0  22.557   <1e-04 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

     Null Deviance: 82873  on 59780  degrees of freedom
 Residual Deviance:  2860  on 59777  degrees of freedom
 
AIC: 2866  BIC: 2893  (Smaller is better. MC Std. Err. = 0)
Code
# the odds ratio for each term. 
or <- exp(coef(m1))  

CUG test

Code
#CuG test on the network using mode size
ctest <- cug.test(g,
         centralization,
         FUN.arg=list(FUN=degree), 
         mode="graph", 
         cmode="size")

#CuG test on the network using mode dyad.census
ctest2 <- cug.test(g,
         centralization,
         FUN.arg=list(FUN=degree), 
         mode="graph", 
         cmode="dyad.census")

#CuG test on the network using mode edges
ctest3 <- cug.test(g,
         centralization,
         FUN.arg=list(FUN=degree), 
         mode="graph", 
         cmode="edges")

par(mfrow=c(1,3))
plot(ctest, main="Degree \nConditioned on Size" )
plot(ctest3, main="Degree \nConditioned on Edges" )
plot(ctest2, main="Degree \nConditioned on Dyads" )

Code
par(mfrow=c(1,1))