#=============================================================# # # # Network permutaton tests, correlation & multiple regression # # (QAP & MRQAP) # # COMM 645 - Communication Networks # # Katya Ognyanova, 10/10/2012 # # # #=============================================================# # This lab will use R packages sna & network. # Make sure you have those packages installed: install.packages("sna") install.packages("network") # ... and loaded: library(sna) library(network) # The emon dataset - interorganizational networks #=============================================================# # We'll use the emon dataset: interorganizational Search and Rescue # Networks (Drabek et al.), included in the "network" package. # The dataset contains 7 networks, each node has 8 attributes ?emon data(emon) emon # a list of 7 networks # Cheyenne network (the first one in the list) ch.net <- emon$Cheyenne plot(ch.net) # Extract the node attributes of the Cheyenne network into a data frame ch.attr <- data.frame(matrix (0,14,8)) colnames(ch.attr) <- c("vertex.names", "Command.Rank.Score", "Decision.Rank.Score", "Formalization", "Location", "Paid.Staff", "Sponsorship", "Volunteer.Staff") # Copy each of the 8 vertex attributes to a variable in the ch.attr data frame for (i in 1:8) { ch.attr[,i] <- (ch.net %v% colnames(ch.attr)[i]) } ch.attr # Correlation & Linear Regression in R #=============================================================# # Check the correlation between command rank and decision rank scores: # (remember that ch.attr[[2]] is the same as ch.attr$Command.Rank.Score, etc.) cor(ch.attr[[2]], ch.attr[[3]]) # Calculate the correlation btw command and decision rank cor.test(ch.attr[[2]], ch.attr[[3]]) # Examine the significance # Linear regression: lm(DV ~ IV1 + IV2 + IV3 + ...) # DV = dependent variable, IV = independent variables # As is often the case in R, you can store the regression results in an object. # Let's call it ch.lm.1 (results from Cheyenne linear regression model #1) ch.lm.1 <- lm(ch.attr[[2]] ~ ch.attr[[3]] + ch.attr[[6]] + ch.attr[[8]]) summary(ch.lm.1) # Calculate centrality measures and store them in the ch.attr data frame: ch.attr$IndegCent <- degree(ch.net, gmode="digraph", cmode="indegree") # indegree centralities ch.attr$OutdegCent <- degree(ch.net, gmode="digraph", cmode="outdegree") # outdegree centralities ch.attr$BetweenCent <- betweenness(ch.net, gmode="digraph") # betweenness centralities # We can use the centralities in a linear regression model: ch.lm.2 <- lm(ch.attr[[2]] ~ ch.attr[[6]] + ch.attr[[11]]) # Model with betweenness centrality summary(ch.lm.2) # Conditional Uniform Graph (CUG) tests for Network Measures #=============================================================# # Let's examine a network measure computed for our observed data. # (e.g. reciprocity, transitivity, in-degree centralization, etc.) # How can we tell whether the score we're looking at is relatively high/low? # Well, we can compute the same measure for all graphs with certain proprerties # (for instance all graphs that have the same size/density as our observed network). # Then we can see how the the measure computed for our observed network compares. # Of course, in many cases it's not very practical to look at all possible graphs # of a particular size and density. Instead, we randomly draw graphs from a uniform # distribution (i.e. each graph has an equal probability of being selected) and compare # the metrics computed for those graphs with the one we got from our network. # This is the basic idea behind the conditional uniform graph (CUG) test. # The function we'll use is cug.test(OurNetwork, Some.network.measure.function, cmode="condition" ) # Conditioning on size #====================================# # We're drawing from all possible graphs with the same number of nodes as ours. # How does the density of our network compare? # (remember gden is the graph density function) ch.cug.den <- cug.test(ch.net, gden, cmode="size") ch.cug.den # Conditioning on number of edges #====================================# # Is there more reciprocity in our observed network than we can expect based on its density? ch.cug.recip <- cug.test(ch.net, grecip, cmode="edges") ch.cug.recip plot(ch.cug.recip) # Looks like the answer is yes - very few of those 1000 graphs with the same density as # our network had reciprocity quite as high as we do. # In order to do this for centralization, we also have to tell cug.test what the arguments for # the "centralization" function are. Remember it takes argument that indicate what type of # centralization should be calculated - in-degree, out-degree, betweenness, etc. # We supply those arguments using FUN.arg= list of arguments for the function: ch.cug.cent <- cug.test(ch.net, centralization, cmode="edges", FUN.arg=list(FUN=degree, cmode="outdegree")) ch.cug.cent plot(ch.cug.cent) # Conditioning on dyad census #====================================# # Here we're comparing our network to graphs that have the same number of dyads that are: # (1) null - i.e. dyads (i,j) with no links from i to j or j to i. # (2) asymmetric - dyads (i,j) with only one directed link ( i -> j or j <- i) # (3) mutual - dyads (i,j) with two directed links (i <-> j) # # Note that this fixes size, density, and reciprocity - all graphs will have the same number # of reciprocated edges as our observed network. # Given the density and reciprocity of our network, how extreme is its level of transitivity? ch.cug.tr <- cug.test(ch.net, gtrans, cmode="dyad") # Conditioning on dyad census ch.cug.tr plot(ch.cug.tr) # The answer is: very ;) # Network correlations and the Quadratic Assignment Procedure (QAP) #=============================================================# # Permutation tests # We can restrict the graphs we compare our network to by looking at ones that are # somewhat similar in structure to our observed data. We generate those by taking the # matrix for our network and reshuffling (permutating) its rows and columns. # This is the idea behind the Quadratic Assignment Procedure (QAP) used to test # the significance of correlations between networks. # We'll use the Padgett florentine marriage & business ties dataset from the ergm package: install.packages("ergm") library(ergm) data(florentine) detach("package:ergm") # The data contains two network objects - one with marital and another one # with business relations between Florentine families. flobusiness; flomarriage # Compute the network correlation: gcor(flomarriage,flobusiness) # Test the correlation using qaptest: # qaptest(A.List.Of.Networks, Some.Network.Function, reps=100) # where reps is the number of graphs to be generated. # Out of a 100 permutations, how many had >= or <= correlation than the observed value? # (the parameters g1=1 g2=2 tell the test to compare the first & second element of # the list of networks that is the first argument of qaptest) flo.qap <- qaptest(list(flomarriage,flobusiness), gcor, g1=1, g2=2, reps=100) flo.qap plot(flo.qap) # Instead of correlation, we could test another measure of similarity/distance # between two networks. # Multiple regression for network variables: MRQAP #=============================================================# # We can similarly use permutaton tests for a linear regression # on network variables - known as MRQAP (multiple regression QAP) # Note that the matrices used in the regression can contain different relations, the same # relation at different points in time, or even dyadic covariate (attribute) data. For instance, # you can have a matrix of social ties as a DV and a same-gender attrubute matrix as an IV. # (i.e. a matrix where (i,j)=1 if i and j are both female(male), and 0 if they have different genders) # Let's create an array containing 3 random networks, each with 10 nodes: x <- rgraph(10, 3) x[1,,] # matrix 1 x[2,,] # matrix 2 x[3,,] # matrix 3 # A matrix constructed as a linear combination of the first two networks in x: y <- 3*x[1,,] + 5*x[2,,] + 7 # Now let's run a network regression of y on x and see what happens. # netlm takes a single matrix or network object as its first argument (DV) # and a stack of matrices (array containing all the IVs) or a list of # network objects as its second argument. net.mod.1 <- netlm(y, x, reps=100) summary(net.mod.1) # Oh look - the first two parameters are significant and close to 3 and 5, and the intercept is 7 # Just as you'd expect based on the way we constructed y. #=============================================================#