Data Analytics and Computational Social Science: Networks Blog Post 9

Noah Milstein

Loading the Networks

Putting Network into Necessary Formats

Adding Attributes

Brokerage scores in the 1000s

Brokerage scores in the 1100s

Brokerage scores in the 1200s

Inital Graphical Representation

1000s GGplot

1100s GGplot

1200s GGplot

wars_in_1000s_edgelist <- as.matrix(wars_in_1000s)

wars_in_1000s_edgelist_network_edgelist <- graph.edgelist(wars_in_1000s_edgelist, directed=TRUE)

wars_in_1000s.ig<-graph_from_data_frame(wars_in_1000s)

wars_in_1000s_network <- asNetwork(wars_in_1000s.ig)

aspects_of_1000s_states <- read_excel("~/Desktop/Spring 2022/Networks/aspects_of_1000s_states.xlsx")

total_1000s <- merge(aspects_of_1000s_states, wars_in_1000s.nodes.stat_2, by="name")

aspects_of_1100s_states <- read_excel("~/Desktop/Spring 2022/Networks/aspects_of_1100s_states.xlsx")

total_1100s <- merge(aspects_of_1100s_states, wars_in_1100s.nodes.stat_2, by="name")

aspects_of_1200s_states <- read_excel("~/Desktop/Spring 2022/Networks/aspects_of_1200s_states.xlsx")

total_1200s <- merge(aspects_of_1200s_states, wars_in_1200s.nodes.stat_2, by="name")

total_1000s_brokerag_reg<-total_1000s

total_1000s_brokerag_reg$win_rate <- (total_1000s_brokerag_reg$outdegree/total_1000s_brokerag_reg$totdegree)

total_1000s_brokerag_reg$loss_rate <- (total_1000s_brokerag_reg$indegree/total_1000s_brokerag_reg$totdegree)

total_1000s_brokerag_reg_binom <- total_1000s_brokerag_reg %>% mutate(more_win_or_loss = case_when(
  win_rate < 0.5 ~ 0,
    win_rate >= 0.5 ~ 1))

First_1000s_regression <- glm(more_win_or_loss~.-name-totdegree-indegree-outdegree-dc-eigen.dc-win_rate-loss_rate, total_1000s_brokerag_reg_binom, family=binomial)

First_1000s_regression


Call:  glm(formula = more_win_or_loss ~ . - name - totdegree - indegree - 
    outdegree - dc - eigen.dc - win_rate - loss_rate, family = binomial, 
    data = total_1000s_brokerag_reg_binom)

Coefficients:
 (Intercept)      Catholic         Islam      Orthodox      Buddhist  
  -2.090e+01     1.446e-01    -7.108e-02    -4.043e-01    -8.572e-02  
       Pagan      Tengrism        Shinto         Hindu     Shamanism  
   5.506e-01    -5.656e+01     1.820e+00    -2.142e+00    -1.506e+00  
       eigen         close            rc      eigen.rc    broker.tot  
  -1.877e+03     5.146e+03    -3.979e+00     1.574e+03     2.378e+02  
broker.coord   broker.itin    broker.rep   broker.gate    broker.lia  
  -9.610e+01    -9.449e+01    -7.164e+01    -2.810e+01    -1.298e+02  

Degrees of Freedom: 101 Total (i.e. Null);  82 Residual
  (8 observations deleted due to missingness)
Null Deviance:      140.8 
Residual Deviance: 4.53e-09     AIC: 40

set.seed(292)

total_1000s_for_regression <- total_1000s[,-c(1, 20:25)]

total_1000s_for_regression$win_rate <- (total_1000s_for_regression$outdegree/total_1000s_for_regression$totdegree)

total_1000s_for_regression$loss_rate <- (total_1000s_for_regression$indegree/total_1000s_for_regression$totdegree)

total_1000s_for_regression <- total_1000s_for_regression %>% mutate(more_win_or_loss = case_when(
  win_rate < 0.5 ~ 0,
    win_rate >= 0.5 ~ 1))

First_1000s_regression <- glm(more_win_or_loss~.-loss_rate-win_rate-totdegree-indegree-outdegree-dc-eigen.dc, total_1000s_for_regression, family=binomial)

First_1000s_regression


Call:  glm(formula = more_win_or_loss ~ . - loss_rate - win_rate - totdegree - 
    indegree - outdegree - dc - eigen.dc, family = binomial, 
    data = total_1000s_for_regression)

Coefficients:
(Intercept)     Catholic        Islam     Orthodox     Buddhist  
   -15.1948      13.9008      12.7531      14.6893      15.0858  
      Pagan     Tengrism       Shinto        Hindu    Shamanism  
     0.9610      11.6691      16.0623       9.1358      -0.1497  
      eigen        close           rc     eigen.rc  
   -82.1100     256.5294      -3.3322     -17.3152  

Degrees of Freedom: 109 Total (i.e. Null);  96 Residual
Null Deviance:      152.3 
Residual Deviance: 58.4     AIC: 86.4

set.seed(6738)

in_training<- sample(1:nrow(total_1000s_for_regression),  nrow(total_1000s_for_regression) * 0.7 )

training_1000s <- total_1000s_for_regression[in_training,]

test_1000s <- total_1000s_for_regression[-in_training,]

lm_1000s_binom_subset_1 <- glm(more_win_or_loss~.-loss_rate-win_rate-totdegree-indegree-outdegree-dc-eigen.dc, total_1000s_for_regression, family=binomial, subset = in_training )

logsitic_1_1000s_prob <- predict(lm_1000s_binom_subset_1, test_1000s,
type = "response")

log_preds_1<-ifelse(logsitic_1_1000s_prob >= 0.5, 1, 0)

prediction_1_logs <-mean(log_preds_1 == test_1000s$more_win_or_loss)

prediction_1_logs %>% kable()

x
0.9090909

library(glmnet)
library(MASS)

set.seed(246)

x_ridge <- model.matrix(more_win_or_loss ~ .-loss_rate-win_rate-totdegree-indegree-outdegree-dc-eigen.dc, total_1000s_for_regression)[, -1] 

y_ridge <- total_1000s_for_regression$more_win_or_loss

grid <- 10^seq(10, -2, length = 100)

ridge.mod <- glmnet(x_ridge, y_ridge, alpha = 0, lambda = grid)

dim(coef(ridge.mod))

[1]  14 100

set.seed(729)
train_ridge <- sample(1:nrow(x_ridge), nrow(x_ridge)*0.8 ) 

test_ridge <- (-train_ridge)

y.test_ridge <- y_ridge[test_ridge]

set.seed(9292)

ridge.mod <- glmnet(x_ridge[train_ridge, ], y_ridge[train_ridge], 
                    alpha = 0, lambda = grid, thresh = 1e-12)

ridge.pred <- predict(ridge.mod, s = 4, newx = x_ridge[test_ridge,])

mean((ridge.pred - y.test_ridge)^2) %>% kable()

x
0.2416376

set.seed(231)
ridge.pred <- predict(ridge.mod, s = 0, newx = x_ridge[test_ridge, ], 
                      exact = T, x = x_ridge[train_ridge, ], y = y_ridge[train_ridge])

predict(ridge.mod, s = 0, exact = T, type = "coefficients", 
        x = x_ridge[train_ridge, ], y = y_ridge[train_ridge])[1:14, ]

(Intercept)    Catholic       Islam    Orthodox    Buddhist 
 0.21024033  0.21827317 -0.01160454  0.21312966  0.35601806 
      Pagan    Tengrism      Shinto       Hindu   Shamanism 
 0.08955257  0.14069809  0.38278477 -0.07034364 -0.01038790 
      eigen       close          rc    eigen.rc 
-4.61480591 12.51011844 -0.29977861  4.64835194

set.seed(9292)

cv.out <- cv.glmnet(x_ridge[train_ridge, ], y_ridge[train_ridge], alpha = 0) 

plot(cv.out)

set.seed(9292)

bestlam <- cv.out$lambda.min

bestlam

[1] 0.415338

set.seed(9292)

ridge.pred <- predict(cv.out, s = bestlam, newx = x_ridge[test_ridge,])

mean((ridge.pred - y.test_ridge)^2) %>% kable()

x
0.174632

set.seed(2897)

x_lasso <- model.matrix(more_win_or_loss ~ .-loss_rate-win_rate-totdegree-indegree-outdegree-dc-eigen.dc, total_1000s_for_regression)[, -1] 

y_lasso <- total_1000s_for_regression$more_win_or_loss

grid <- 10^seq(10, -2, length = 100)

lasso.mod <- glmnet(x_lasso, y_lasso, alpha = 0, lambda = grid)

dim(coef(lasso.mod))

[1]  14 100

set.seed(729)

train_lasso <- sample(1:nrow(x_ridge), nrow(x_ridge)*0.8 ) 

test_lasso <- (-train_lasso)

y.test_lasso <- y_lasso[test_lasso]

set.seed(9292)

lasso.mod <- glmnet(x_lasso[train_lasso, ], y_lasso[train_lasso], 
                    alpha = 1, lambda = grid)

plot(lasso.mod)

set.seed(1029)

cv.out_2 <- cv.glmnet(x_lasso[train_lasso, ], y_lasso[train_lasso], alpha = 1) 

plot(cv.out_2)

set.seed(1920)

bestlam_2 <- cv.out_2$lambda.min

lasso.pred <- predict(cv.out_2, s = bestlam_2, newx = x_ridge[test_ridge,])

mean((lasso.pred - y.test_ridge)^2) %>% kable()

x
0.1749583

set.seed(2739)

out <- glmnet(x_lasso[train_lasso, ], y_lasso[train_lasso], 
              alpha = 1, lambda = grid)

lasso.coef <- predict(out, type = "coefficients", s = bestlam_2)[1:14, ]

lasso.coef

(Intercept)    Catholic       Islam    Orthodox    Buddhist 
 0.42561685  0.05577020 -0.09275344  0.00000000  0.00000000 
      Pagan    Tengrism      Shinto       Hindu   Shamanism 
 0.00000000  0.00000000  0.00000000  0.00000000  0.00000000 
      eigen       close          rc    eigen.rc 
 0.00000000  3.22570629 -0.21240622  0.00000000

Community Grouping

Label Propagation 1000s:

The first community cluster below is done using label propagation. This results in 39 groups

set.seed(23)
comm.lab<-label.propagation.community(wars_in_1000s.ig)
#Inspect clustering object
# igraph::groups(comm.lab)

Walktrap 1000s:

Walktrap classification as seen below results in 19 distinct communities.

set.seed(238)
#Run clustering algorithm: fast_greedy
wars_in_1000s.wt<-walktrap.community(wars_in_1000s.ig)

#igraph::groups(wars_in_1000s.wt)

Adding more steps resulted in 19 groups for both 10 and 20 steps.

#Run & inspect clustering algorithm: 10 steps
#igraph::groups(walktrap.community(wars_in_1000s.ig, steps=10)) 
#Run & inspect clustering algorithm: 20 steps
#igraph::groups(walktrap.community(wars_in_1000s.ig ,steps=20))
#Run & inspect clustering algorithm

Machine Learning, Regression and Principle Components:

total_1000s_for_PCA <- total_1000s_brokerag_reg_binom[-c(20:27)]

apply(total_1000s_for_PCA[-1], 2, mean)

        Catholic            Islam         Orthodox         Buddhist 
     0.454545455      0.181818182      0.154545455      0.063636364 
           Pagan         Tengrism           Shinto            Hindu 
     0.036363636      0.018181818      0.054545455      0.045454545 
       Shamanism        totdegree         indegree        outdegree 
     0.009090909      2.754545455      1.336363636      1.418181818 
           eigen            close               rc         eigen.rc 
     0.028058711      0.023546832      0.287358773      0.003637773 
              dc         eigen.dc more_win_or_loss 
     0.712641227      0.024420939      0.481818182

apply(total_1000s_for_PCA[-1], 2, var)

        Catholic            Islam         Orthodox         Buddhist 
    0.2502085071     0.1501251043     0.1318598832     0.0601334445 
           Pagan         Tengrism           Shinto            Hindu 
    0.0353628023     0.0180150125     0.0520433695     0.0437864887 
       Shamanism        totdegree         indegree        outdegree 
    0.0090909091     8.9208507089     2.6656380317     6.3189324437 
           eigen            close               rc         eigen.rc 
    0.0076304265     0.0019575460     0.1260782284     0.0004728954 
              dc         eigen.dc more_win_or_loss 
    0.1260782284     0.0056490031     0.2519599666

pr.out <- prcomp(total_1000s_for_PCA[-1], scale = TRUE)

names(pr.out)

[1] "sdev"     "rotation" "center"   "scale"    "x"

pr.out$center

        Catholic            Islam         Orthodox         Buddhist 
     0.454545455      0.181818182      0.154545455      0.063636364 
           Pagan         Tengrism           Shinto            Hindu 
     0.036363636      0.018181818      0.054545455      0.045454545 
       Shamanism        totdegree         indegree        outdegree 
     0.009090909      2.754545455      1.336363636      1.418181818 
           eigen            close               rc         eigen.rc 
     0.028058711      0.023546832      0.287358773      0.003637773 
              dc         eigen.dc more_win_or_loss 
     0.712641227      0.024420939      0.481818182

pr.out$scale

        Catholic            Islam         Orthodox         Buddhist 
      0.50020846       0.38745981       0.36312516       0.24522122 
           Pagan         Tengrism           Shinto            Hindu 
      0.18805000       0.13422002       0.22813016       0.20925221 
       Shamanism        totdegree         indegree        outdegree 
      0.09534626       2.98677932       1.63267818       2.51374868 
           eigen            close               rc         eigen.rc 
      0.08735231       0.04424416       0.35507496       0.02174616 
              dc         eigen.dc more_win_or_loss 
      0.35507496       0.07515985       0.50195614

biplot(pr.out, scale = 0)

set.seed(172)

ggbiplot(pr.out, labels =  total_1000s_for_PCA$name, labels.size  =1.5)

pr.out$rotation = -pr.out$rotation 

pr.out$x = -pr.out$x

biplot(pr.out, scale = 0)

pr.out$sdev

 [1] 2.217501e+00 1.681548e+00 1.239242e+00 1.211199e+00 1.065982e+00
 [6] 1.037692e+00 1.029507e+00 1.011117e+00 1.005425e+00 9.514802e-01
[11] 8.848499e-01 7.782431e-01 6.162540e-01 4.426224e-01 2.541422e-01
[16] 1.091189e-01 7.597269e-16 6.258811e-16 2.174635e-16

pr.var <- pr.out$sdev^2

pr.var

 [1] 4.917311e+00 2.827605e+00 1.535720e+00 1.467004e+00 1.136318e+00
 [6] 1.076804e+00 1.059884e+00 1.022359e+00 1.010879e+00 9.053146e-01
[11] 7.829594e-01 6.056623e-01 3.797690e-01 1.959146e-01 6.458828e-02
[16] 1.190694e-02 5.771849e-31 3.917271e-31 4.729037e-32

pve <- pr.var / sum(pr.var)

pve

 [1] 2.588059e-01 1.488213e-01 8.082739e-02 7.721075e-02 5.980622e-02
 [6] 5.667390e-02 5.578337e-02 5.380835e-02 5.320417e-02 4.764814e-02
[11] 4.120839e-02 3.187696e-02 1.998784e-02 1.031129e-02 3.399383e-03
[16] 6.266808e-04 3.037815e-32 2.061722e-32 2.488967e-33

par(mfrow = c(1, 2))
plot(pve, xlab = "Principal Component",
ylab = "Proportion of Variance Explained", ylim = c(0, 1),
type = "b")

plot(cumsum(pve), xlab = "Principal Component",
ylab = "Cumulative Proportion of Variance Explained", ylim = c(0, 1), type = "b")

names(total_1200s)

 [1] "name"         "Catholic"     "Islam"        "Orthodox"    
 [5] "Buddhist"     "Pagan"        "Tengrism"     "Shinto"      
 [9] "Hindu"        "Shamanism"    "totdegree"    "indegree"    
[13] "outdegree"    "eigen"        "rc"           "eigen.rc"    
[17] "dc"           "eigen.dc"     "broker.tot"   "broker.coord"
[21] "broker.itin"  "broker.rep"   "broker.gate"  "broker.lia"

total_1200s_brokerag_reg<-total_1200s

total_1200s_brokerag_reg$win_rate <- (total_1200s_brokerag_reg$outdegree/total_1200s_brokerag_reg$totdegree)

total_1200s_brokerag_reg$loss_rate <- (total_1200s_brokerag_reg$indegree/total_1200s_brokerag_reg$totdegree)

total_1200s_brokerag_reg_binom <- total_1200s_brokerag_reg %>% mutate(more_win_or_loss = case_when(
  win_rate < 0.5 ~ 0,
    win_rate >= 0.5 ~ 1))

total_1200s_for_PCA <- total_1200s_brokerag_reg_binom[-c(20:27)]


apply(total_1200s_for_PCA[-1], 2, mean)

   Catholic       Islam    Orthodox    Buddhist       Pagan 
0.712500000 0.068750000 0.087500000 0.087500000 0.012500000 
   Tengrism      Shinto       Hindu   Shamanism   totdegree 
0.025000000 0.000000000 0.006250000 0.000000000 3.918750000 
   indegree   outdegree       eigen          rc    eigen.rc 
1.962500000 1.956250000 0.025567955 0.158754617 0.002192746 
         dc    eigen.dc  broker.tot 
0.841245383 0.023375209 0.341581810

apply(total_1200s_for_PCA[-1], 2, var)

    Catholic        Islam     Orthodox     Buddhist        Pagan 
2.061321e-01 6.442610e-02 8.034591e-02 8.034591e-02 1.242138e-02 
    Tengrism       Shinto        Hindu    Shamanism    totdegree 
2.452830e-02 0.000000e+00 6.250000e-03 0.000000e+00 2.666631e+01 
    indegree    outdegree        eigen           rc     eigen.rc 
6.237579e+00 1.595405e+01 5.631476e-03 7.141295e-02 7.316162e-05 
          dc     eigen.dc   broker.tot 
7.141295e-02 4.574350e-03 3.001236e+01

# I cannot scale variables with 

total_1200s_for_PCA<-total_1200s_for_PCA[-c(8,10)]

pr.out_2 <- prcomp(total_1200s_for_PCA[-1], scale = TRUE)

names(pr.out_2)

[1] "sdev"     "rotation" "center"   "scale"    "x"

pr.out_2$center

   Catholic       Islam    Orthodox    Buddhist       Pagan 
0.712500000 0.068750000 0.087500000 0.087500000 0.012500000 
   Tengrism       Hindu   totdegree    indegree   outdegree 
0.025000000 0.006250000 3.918750000 1.962500000 1.956250000 
      eigen          rc    eigen.rc          dc    eigen.dc 
0.025567955 0.158754617 0.002192746 0.841245383 0.023375209 
 broker.tot 
0.341581810

pr.out_2$scale

   Catholic       Islam    Orthodox    Buddhist       Pagan 
0.454017704 0.253822971 0.283453545 0.283453545 0.111451261 
   Tengrism       Hindu   totdegree    indegree   outdegree 
0.156615139 0.079056942 5.163943541 2.497514488 3.994251963 
      eigen          rc    eigen.rc          dc    eigen.dc 
0.075043164 0.267232010 0.008553457 0.267232010 0.067633938 
 broker.tot 
5.478353760

biplot(pr.out_2, scale = 0)

pr.out_2$rotation = -pr.out_2$rotation 

pr.out_2$x = -pr.out_2$x

biplot(pr.out_2, scale = 0)

set.seed(8192)

ggbiplot(pr.out_2, labels =  total_1200s_for_PCA$name, labels.size  =1.5)

pr.out$sdev

 [1] 2.217501e+00 1.681548e+00 1.239242e+00 1.211199e+00 1.065982e+00
 [6] 1.037692e+00 1.029507e+00 1.011117e+00 1.005425e+00 9.514802e-01
[11] 8.848499e-01 7.782431e-01 6.162540e-01 4.426224e-01 2.541422e-01
[16] 1.091189e-01 7.597269e-16 6.258811e-16 2.174635e-16

pr.var_2 <- pr.out_2$sdev^2

pr.var_2

 [1] 4.903737e+00 2.344663e+00 1.670548e+00 1.250176e+00 1.132904e+00
 [6] 1.097802e+00 1.011326e+00 9.460639e-01 8.661454e-01 5.139677e-01
[11] 1.659928e-01 9.667541e-02 2.916516e-30 4.832251e-31 2.292490e-31
[16] 1.889562e-32

pve_2 <- pr.var_2 / sum(pr.var_2)

pve_2

 [1] 3.064835e-01 1.465414e-01 1.044092e-01 7.813602e-02 7.080651e-02
 [6] 6.861260e-02 6.320785e-02 5.912899e-02 5.413409e-02 3.212298e-02
[11] 1.037455e-02 6.042213e-03 1.822822e-31 3.020157e-32 1.432806e-32
[16] 1.180977e-33

par(mfrow = c(1, 2))
plot(pve_2, xlab = "Principal Component",
ylab = "Proportion of Variance Explained", ylim = c(0, 1),
type = "b")

plot(cumsum(pve_2), xlab = "Principal Component",
ylab = "Cumulative Proportion of Variance Explained", ylim = c(0, 1), type = "b")

CUG 1000s

trans.cug<-cug.test(wars_1000s,FUN=gtrans,mode="digraph",cmode="size")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.0984456 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

plot(trans.cug)

#t-stat between observed and simulated networks
(trans.cug$obs.stat-mean(trans.cug$rep.stat))/sd(trans.cug$rep.stat)

[1] -88.00188

cug.t<-function(cug.object){
  (cug.object$obs.stat-mean(cug.object$rep.stat))/sd(cug.object$rep.stat)
}

#compare network transitivity to null conditional on size
trans.cug<-cug.test(wars_1000s,FUN=gtrans,mode="digraph",cmode="size", reps = 100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.0984456 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

#plot vs. simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] -83.27903

CUG 1100s

trans.cug<-cug.test(wars_1100s,FUN=gtrans,mode="digraph",cmode="size")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.1388889 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

plot(trans.cug)

#t-stat between observed and simulated networks
(trans.cug$obs.stat-mean(trans.cug$rep.stat))/sd(trans.cug$rep.stat)

[1] -67.57014

cug.t<-function(cug.object){
  (cug.object$obs.stat-mean(cug.object$rep.stat))/sd(cug.object$rep.stat)
}

#compare network transitivity to null conditional on size
trans.cug<-cug.test(wars_1100s,FUN=gtrans,mode="digraph",cmode="size", reps = 100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.1388889 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

#plot vs. simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] -62.85368

CUG 1200s

trans.cug<-cug.test(wars_1200s,FUN=gtrans,mode="digraph",cmode="size")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.1111111 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

plot(trans.cug)

#t-stat between observed and simulated networks
(trans.cug$obs.stat-mean(trans.cug$rep.stat))/sd(trans.cug$rep.stat)

[1] -120.9757

cug.t<-function(cug.object){
  (cug.object$obs.stat-mean(cug.object$rep.stat))/sd(cug.object$rep.stat)
}

#compare network transitivity to null conditional on size
trans.cug<-cug.test(wars_1200s,FUN=gtrans,mode="digraph",cmode="size", reps = 100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: size 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.1111111 
Pr(X>=Obs): 1 
Pr(X<=Obs): 0

#plot vs. simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] -123.9686

CUG Test Centralization 1000s

#compare network degree centralization to null conditional on size
c.degree.cug <-cug.test(wars_1000s,FUN=centralization,  FUN.arg=list(FUN=degree, cmode="indegree"), mode="digraph", cmode="size") 
#plot vs simulation results
plot(c.degree.cug)

#t-stat between observed and simulated networks
cug.t(c.degree.cug)

[1] -3.691657

#compare network betweenness centralization to null conditional on size
b.degree.cug <-cug.test(wars_1000s,FUN=centralization,  FUN.arg=list(FUN=betweenness, cmode="directed"), mode="digraph", cmode="size", reps=100) 
#plot vs simulation results
plot(b.degree.cug)

#t-stat between observed and simulated networks
cug.t(b.degree.cug)

[1] 52.41828

CUG Test Centralization 1200s

#compare network degree centralization to null conditional on size
c.degree.cug <-cug.test(wars_1100s,FUN=centralization,  FUN.arg=list(FUN=degree, cmode="indegree"), mode="digraph", cmode="size") 
#plot vs simulation results
plot(c.degree.cug)

#t-stat between observed and simulated networks
cug.t(c.degree.cug)

[1] 0.1300734

#compare network betweenness centralization to null conditional on size
b.degree.cug <-cug.test(wars_1100s,FUN=centralization,  FUN.arg=list(FUN=betweenness, cmode="directed"), mode="digraph", cmode="size", reps=100) 
#plot vs simulation results
plot(b.degree.cug)

#t-stat between observed and simulated networks
cug.t(b.degree.cug)

[1] 29.04174

CUG Test Centralization 1200s

#compare network degree centralization to null conditional on size
c.degree.cug <-cug.test(wars_1200s,FUN=centralization,  FUN.arg=list(FUN=degree, cmode="indegree"), mode="digraph", cmode="size") 
#plot vs simulation results
plot(c.degree.cug)

#t-stat between observed and simulated networks
cug.t(c.degree.cug)

[1] -1.973945

#compare network betweenness centralization to null conditional on size
b.degree.cug <-cug.test(wars_1200s,FUN=centralization,  FUN.arg=list(FUN=betweenness, cmode="directed"), mode="digraph", cmode="size", reps=100) 
#plot vs simulation results
plot(b.degree.cug)

#t-stat between observed and simulated networks
cug.t(b.degree.cug)

[1] 220.5521

Conditioning on Different Network Properties 1000s

#compare network transitivity to null conditional on dyads
trans.cug<-cug.test(wars_1000s,FUN=gtrans,mode="digraph",cmode="dyad")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: dyad.census 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.0984456 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 11.14202

#compare network transitivity to null conditional on edges (density)
trans.cug<-cug.test(wars_1000s,FUN=gtrans,mode="digraph",cmode="edges", reps=100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: edges 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.0984456 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 12.58383

Conditioning on Different Network Properties 1100s

#compare network transitivity to null conditional on dyads
trans.cug<-cug.test(wars_1100s,FUN=gtrans,mode="digraph",cmode="dyad")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: dyad.census 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.1388889 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 16.10525

#compare network transitivity to null conditional on edges (density)
trans.cug<-cug.test(wars_1100s,FUN=gtrans,mode="digraph",cmode="edges", reps=100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: edges 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.1388889 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 17.64935

Conditioning on Different Network Properties 1200s

#compare network transitivity to null conditional on dyads
trans.cug<-cug.test(wars_1200s,FUN=gtrans,mode="digraph",cmode="dyad")
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: dyad.census 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 1000 

Observed Value: 0.1111111 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 19.55207

#compare network transitivity to null conditional on edges (density)
trans.cug<-cug.test(wars_1200s,FUN=gtrans,mode="digraph",cmode="edges", reps=100)
trans.cug


Univariate Conditional Uniform Graph Test

Conditioning Method: edges 
Graph Type: digraph 
Diagonal Used: FALSE 
Replications: 100 

Observed Value: 0.1111111 
Pr(X>=Obs): 0 
Pr(X<=Obs): 1

#plot vs simulation results
plot(trans.cug)

#t-stat between observed and simulated networks
cug.t(trans.cug)

[1] 20.27918

Compare to Simulated Networks

Simulated Networks 1000s

#create empty dataframe for simulations
trials<-data.frame(id=1:100, gdens=NA, gtrans=NA, cent.deg=NA, cent.bet=NA)

#as.network(wars_in_1000s)

wars_in_1000s_2 <- wars_in_1000s[-c(19, 64, 65, 71),]

wars_in_1000s_2.stat <- as.network(wars_in_1000s_2)

#simulate PA networks and add stats to trials dataframe: size
for ( i in 1:100 ){ 
  pa.ig<- igraph::sample_pa(n = network.size(wars_in_1000s_2.stat), directed=TRUE)
  pa.stat<-intergraph::asNetwork(pa.ig)
  trials$gdens<-gden(pa.stat)
  trials$gtrans[i] <- gtrans(pa.stat)
  trials$cent.deg[i] <- centralization(pa.stat, FUN=degree, cmode="indegree")
  trials$cent.bet[i] <-centralization(pa.stat, FUN=betweenness)
}

summary(trials)

       id             gdens              gtrans     cent.deg      
 Min.   :  1.00   Min.   :0.009009   Min.   :0   Min.   :0.07347  
 1st Qu.: 25.75   1st Qu.:0.009009   1st Qu.:0   1st Qu.:0.13769  
 Median : 50.50   Median :0.009009   Median :0   Median :0.17438  
 Mean   : 50.50   Mean   :0.009009   Mean   :0   Mean   :0.18631  
 3rd Qu.: 75.25   3rd Qu.:0.009009   3rd Qu.:0   3rd Qu.:0.21107  
 Max.   :100.00   Max.   :0.009009   Max.   :0   Max.   :0.50463  
    cent.bet        
 Min.   :0.0009174  
 1st Qu.:0.0026569  
 Median :0.0042626  
 Mean   :0.0048482  
 3rd Qu.:0.0061481  
 Max.   :0.0155508

sim.t<-function(g, trials){
  temp<-data.frame(density=c(gden(g),mean(trials$gdens),sd(trials$gdens)),
             transitivity=c(gtrans(g),mean(trials$gtrans),sd(trials$gtrans)),
             indegCent=c(centralization(g, FUN=degree, cmode="indegree"),mean(trials$cent.deg), sd(trials$cent.deg)),
             betwCent=c(centralization(g, FUN=betweenness), mean(trials$cent.bet), sd(trials$cent.bet)))
  rownames(temp)<-c("Observed","Simulated", "SD")
  temp<-data.frame(t(temp))
  temp$tvalue<-(temp$Observed-temp$Simulated)/temp$SD
  temp
}

plot.sim.t<-function(g,trials){
  temp<-data.frame(net.stat=c("gtrans","cent.deg","cent.bet"), x=c(gtrans(g),centralization(g, FUN=degree, cmode="indegree"), centralization(g, FUN=betweenness)))
  trials[c(3:5)]%>%
    gather(key="net.stat",value = "estimate")%>%
    ggplot(aes(estimate)) +
    geom_histogram() +
    facet_wrap(net.stat ~ ., scales="free", ncol=3) +
    geom_vline(data=temp, aes(xintercept=x),
               linetype="dashed", size=1, colour="red")
}

#check for differences from null

sim.t(g=wars_1000s, trials)

               Observed   Simulated          SD    tvalue
density      0.01253071 0.009009009 0.000000000       Inf
transitivity 0.09844560 0.000000000 0.000000000       Inf
indegCent    0.05157025 0.186305785 0.072871174 -1.848955
betwCent     0.01810448 0.004848199 0.002948065  4.496604

plot.sim.t(wars_in_1000s_2.stat, trials)

Simulated Networks 1100s

#as.network(wars_in_1100s)

wars_in_1100s_2 <- wars_in_1100s[-c(47, 61, 62, 77, 78, 83),]

#as.network(wars_in_1100s_2)

wars_in_1100s_2_1 <- wars_in_1100s_2[-c(178,181, 182, 184, 185, 188),]

#as.network(wars_in_1100s_2_1)

wars_in_1100s_2_2 <- wars_in_1100s_2_1[-c(183, 202, 203, 204, 205, 206),]

#as.network(wars_in_1100s_2_2)

wars_in_1100s_2_3<- wars_in_1100s_2_2[-c(201, 202),]

wars_in_1100s_2.stat <- as.network(wars_in_1100s_2_3)

#simulate PA networks and add stats to trials dataframe: size
for ( i in 1:100 ){ 
  pa.ig<- igraph::sample_pa(n = network.size(wars_in_1100s_2.stat), directed=TRUE)
  pa.stat<-intergraph::asNetwork(pa.ig)
  trials$gdens<-gden(pa.stat)
  trials$gtrans[i] <- gtrans(pa.stat)
  trials$cent.deg[i] <- centralization(pa.stat, FUN=degree, cmode="indegree")
  trials$cent.bet[i] <-centralization(pa.stat, FUN=betweenness)
}

summary(trials)

       id             gdens             gtrans     cent.deg      
 Min.   :  1.00   Min.   :0.01031   Min.   :0   Min.   :0.09484  
 1st Qu.: 25.75   1st Qu.:0.01031   1st Qu.:0   1st Qu.:0.14746  
 Median : 50.50   Median :0.01031   Median :0   Median :0.18956  
 Mean   : 50.50   Mean   :0.01031   Mean   :0   Mean   :0.19567  
 3rd Qu.: 75.25   3rd Qu.:0.01031   3rd Qu.:0   3rd Qu.:0.23166  
 Max.   :100.00   Max.   :0.01031   Max.   :0   Max.   :0.37901  
    cent.bet       
 Min.   :0.001162  
 1st Qu.:0.003045  
 Median :0.004794  
 Mean   :0.005423  
 3rd Qu.:0.007267  
 Max.   :0.017632

sim.t<-function(g, trials){
  temp<-data.frame(density=c(gden(g),mean(trials$gdens),sd(trials$gdens)),
             transitivity=c(gtrans(g),mean(trials$gtrans),sd(trials$gtrans)),
             indegCent=c(centralization(g, FUN=degree, cmode="indegree"),mean(trials$cent.deg), sd(trials$cent.deg)),
             betwCent=c(centralization(g, FUN=betweenness), mean(trials$cent.bet), sd(trials$cent.bet)))
  rownames(temp)<-c("Observed","Simulated", "SD")
  temp<-data.frame(t(temp))
  temp$tvalue<-(temp$Observed-temp$Simulated)/temp$SD
  temp
}

cent.bet, cent.deg, gtrans

plot.sim.t<-function(g,trials){
  temp<-data.frame(net.stat=c("gtrans","cent.deg","cent.bet"), x=c(gtrans(g),centralization(g, FUN=degree, cmode="indegree"), centralization(g, FUN=betweenness)))
  trials[c(3:5)]%>%
    gather(key="net.stat",value = "estimate")%>%
    ggplot(aes(estimate)) +
    geom_histogram() +
    facet_wrap(net.stat ~ ., scales="free", ncol=3) +
    geom_vline(data=temp, aes(xintercept=x),
               linetype="dashed", size=1, colour="red")
}

#check for differences from null

sim.t(g=wars_1100s, trials)

               Observed   Simulated          SD    tvalue
density      0.02555842 0.010309278 0.000000000       Inf
transitivity 0.13888889 0.000000000 0.000000000       Inf
indegCent    0.13205295 0.195666233 0.058576476 -1.085987
betwCent     0.01398369 0.005423314 0.003073955  2.784808

plot.sim.t(wars_in_1100s_2.stat, trials)

Simulated Networks 1200s

# as.network(wars_in_1200s)

wars_in_1200s_2 <- wars_in_1200s[-c(23, 93, 96, 142, 163, 167),]

# as.network(wars_in_1200s_2)

wars_in_1200s_2_1 <- wars_in_1200s_2[-c(162, 189, 225, 236, 302, 304),]

# as.network(wars_in_1200s_2_1)

wars_in_1200s_2_2 <- wars_in_1200s_2_1[-c(299),]

wars_in_1200s_2.stat <- as.network(wars_in_1200s_2_2)

#simulate PA networks and add stats to trials dataframe: size
for ( i in 1:100 ){ 
  pa.ig<- igraph::sample_pa(n = network.size(wars_in_1200s_2.stat), directed=TRUE)
  pa.stat<-intergraph::asNetwork(pa.ig)
  trials$gdens<-gden(pa.stat)
  trials$gtrans[i] <- gtrans(pa.stat)
  trials$cent.deg[i] <- centralization(pa.stat, FUN=degree, cmode="indegree")
  trials$cent.bet[i] <-centralization(pa.stat, FUN=betweenness)
}

summary(trials)

       id             gdens              gtrans     cent.deg      
 Min.   :  1.00   Min.   :0.006211   Min.   :0   Min.   :0.06922  
 1st Qu.: 25.75   1st Qu.:0.006211   1st Qu.:0   1st Qu.:0.12582  
 Median : 50.50   Median :0.006211   Median :0   Median :0.14469  
 Mean   : 50.50   Mean   :0.006211   Mean   :0   Mean   :0.15997  
 3rd Qu.: 75.25   3rd Qu.:0.006211   3rd Qu.:0   3rd Qu.:0.18871  
 Max.   :100.00   Max.   :0.006211   Max.   :0   Max.   :0.35852  
    cent.bet        
 Min.   :0.0005646  
 1st Qu.:0.0018678  
 Median :0.0028960  
 Mean   :0.0033793  
 3rd Qu.:0.0043615  
 Max.   :0.0125295

sim.t<-function(g, trials){
  temp<-data.frame(density=c(gden(g),mean(trials$gdens),sd(trials$gdens)),
             transitivity=c(gtrans(g),mean(trials$gtrans),sd(trials$gtrans)),
             indegCent=c(centralization(g, FUN=degree, cmode="indegree"),mean(trials$cent.deg), sd(trials$cent.deg)),
             betwCent=c(centralization(g, FUN=betweenness), mean(trials$cent.bet), sd(trials$cent.bet)))
  rownames(temp)<-c("Observed","Simulated", "SD")
  temp<-data.frame(t(temp))
  temp$tvalue<-(temp$Observed-temp$Simulated)/temp$SD
  temp
}

plot.sim.t<-function(g,trials){
  temp<-data.frame(net.stat=c("gtrans","cent.deg","cent.bet"), x=c(gtrans(g),centralization(g, FUN=degree, cmode="indegree"), centralization(g, FUN=betweenness)))
  trials[c(3:5)]%>%
    gather(key="net.stat",value = "estimate")%>%
    ggplot(aes(estimate)) +
    geom_histogram() +
    facet_wrap(net.stat ~ ., scales="free", ncol=3) +
    geom_vline(data=temp, aes(xintercept=x),
               linetype="dashed", size=1, colour="red")
}

#check for differences from null

sim.t(g=wars_1100s, trials)

               Observed  Simulated          SD     tvalue
density      0.02555842 0.00621118 0.000000000        Inf
transitivity 0.13888889 0.00000000 0.000000000        Inf
indegCent    0.13205295 0.15996992 0.052300150 -0.5337838
betwCent     0.01398369 0.00337929 0.002025597  5.2351971

plot.sim.t(wars_in_1100s_2.stat, trials)

Comment on this article Share:

Networks Blog Post 9

Loading the Networks

Putting Network into Necessary Formats

Adding Attributes

Brokerage scores in the 1000s

Brokerage scores in the 1100s

Brokerage scores in the 1200s

Inital Graphical Representation

1000s GGplot

1100s GGplot

1200s GGplot

Community Grouping

Label Propagation 1000s:

Walktrap 1000s:

Machine Learning, Regression and Principle Components:

CUG 1000s

CUG 1100s

CUG 1200s

CUG Test Centralization 1000s

CUG Test Centralization 1200s

CUG Test Centralization 1200s

Conditioning on Different Network Properties 1000s

Conditioning on Different Network Properties 1100s

Conditioning on Different Network Properties 1200s

Compare to Simulated Networks

Simulated Networks 1000s

Simulated Networks 1100s

Simulated Networks 1200s

Reuse

Citation