From 02e20ba8123bc9d4deb8ad316e536a2e2c2ad181 Mon Sep 17 00:00:00 2001
From: hyginn <boris.steipe@utoronto.ca>
Date: Wed, 23 Sep 2020 13:00:38 +1000
Subject: [PATCH] Maintenance

---
 FND-MAT-Graphs_and_networks.R | 173 +++++++++++++++++-----------------
 1 file changed, 85 insertions(+), 88 deletions(-)

diff --git a/FND-MAT-Graphs_and_networks.R b/FND-MAT-Graphs_and_networks.R
index d560fb9..760636c 100644
--- a/FND-MAT-Graphs_and_networks.R
+++ b/FND-MAT-Graphs_and_networks.R
@@ -1,11 +1,5 @@
 # tocID <- "FND-MAT-Graphs_and_networks.R"
 #
-# ---------------------------------------------------------------------------- #
-#  PATIENCE  ...                                                               #
-#    Do not yet work wih this code. Updates in progress. Thank you.            #
-#    boris.steipe@utoronto.ca                                                  #
-# ---------------------------------------------------------------------------- #
-#
 # Purpose:  A Bioinformatics Course:
 #              R code accompanying the FND-MAT-Graphs_and_networks unit.
 #
@@ -22,7 +16,7 @@
 #           0.1    First code copied from 2016 material.
 #
 #
-# TODO:
+# TODO:     Add some explicit tasks and sample solutions.
 #
 #
 # == DO NOT SIMPLY  source()  THIS FILE! =======================================
@@ -35,30 +29,29 @@
 
 
 #TOC> ==========================================================================
-#TOC>
-#TOC>   Section  Title                                        Line
-#TOC> ------------------------------------------------------------
-#TOC>   1        Review                                         50
-#TOC>   2        DEGREE DISTRIBUTIONS                          204
-#TOC>   2.1        Random graph                                210
-#TOC>   2.2        scale-free graph (Barabasi-Albert)          258
-#TOC>   2.3        Random geometric graph                      323
-#TOC>   3        A CLOSER LOOK AT THE igraph PACKAGE           445
-#TOC>   3.1        Basics                                      448
-#TOC>   3.2        Components                                  520
-#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS               539
-#TOC>   4.1        Diameter                                    576
-#TOC>   5        GRAPH CLUSTERING                              645
-#TOC>
+#TOC> 
+#TOC>   Section  Title                                          Line
+#TOC> --------------------------------------------------------------
+#TOC>   1        REVIEW                                           50
+#TOC>   2        DEGREE DISTRIBUTIONS                            206
+#TOC>   2.1        Random graph                                  212
+#TOC>   2.2        scale-free graph (Barabasi-Albert)            260
+#TOC>   2.3        Random geometric graph                        326
+#TOC>   3        A CLOSER LOOK AT THE igraph:: PACKAGE           448
+#TOC>   3.1        Basics                                        451
+#TOC>   3.2        Components                                    523
+#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS                 542
+#TOC>   4.1        Diameter                                      579
+#TOC>   5        GRAPH CLUSTERING                                648
+#TOC> 
 #TOC> ==========================================================================
 
 
-# =    1  Review  ==============================================================
+# =    1  REVIEW  ==============================================================
 
 # This tutorial covers basic concepts of graph theory and analysis in R. Make
 # sure you have pulled the latest version of the project from the GitHub
-# repository, and that you have typed init() to load some utility functions and
-# data.
+# repository.
 
 # Let's explore some of the basic ideas of graph theory by starting with a small
 # random graph.
@@ -82,7 +75,7 @@ makeRandomGenenames <- function(N) {
 N <- 20
 
 set.seed(112358)                       # set RNG seed for repeatable randomness
-(Nnames <- makeRandomGenenames(N))
+( Nnames <- makeRandomGenenames(N) )
 set.seed(NULL)                         # reset the RNG
 
 # One way to represent graphs in a computer is as an "adjacency matrix". In this
@@ -90,11 +83,11 @@ set.seed(NULL)                         # reset the RNG
 # intersection of a row and column contains a value/TRUE if there is an edge,
 # 0/FALSE otherwise.
 
-# Let's create an adjacency matrix for random graph: let's say a pair of nodes
-# has probability p <- 0.1 to have an edge, and our graph is symmetric , i.e. it
-# is an undirected graph, and it has neither self-edges, i.e. loops, nor
-# multiple edges between the same nodes, i.e. it is a "simple" graph. We use our
-# the Nnames vector as node labels.
+# Let's create an adjacency matrix for our random graph: let's say a pair
+# of nodes has probability p <- 0.09 to have an edge, and our graph is
+# symmetric , i.e. it is an undirected graph, and it has neither self-edges,
+# i.e. loops, nor multiple edges between the same nodes, i.e. it
+# is a "simple" graph. We use the Nnames vector as node labels.
 
 makeRandomAM <- function(nam, p = 0.1) {
   # Make a random adjacency matrix for a set of nodes with edge probability p
@@ -106,11 +99,11 @@ makeRandomAM <- function(nam, p = 0.1) {
   #
 
   N <- length(nam)
-  AM <- matrix(numeric(N * N), ncol = N)  # The adjacency matrix
+  AM <- matrix(numeric(N * N), ncol = N)  # Adjacency matrix, pre-filled with 0
   rownames(AM) <- nam
   colnames(AM) <- nam
   for (iRow in 1:(N-1)) { # Note how we make sure iRow != iCol - this prevents
-                          # loops
+                          # loops (self-edges)
     for (iCol in (iRow+1):N) {
       if (runif(1) < p) {     # runif() creates uniform random numbers
                               # between 0 and 1. The expression is TRUE with
@@ -123,13 +116,13 @@ makeRandomAM <- function(nam, p = 0.1) {
 }
 
 set.seed(112358)                       # set RNG seed for repeatable randomness
-(myRandAM <- makeRandomAM(Nnames, p = 0.09))
+( myRandAM <- makeRandomAM(Nnames, p = 0.09) )
 set.seed(NULL)                         # reset the RNG
 
 
 # Listing the matrix is not very informative - we should plot this graph. The
-# standard package for work with graphs in r is "igraph". We'll go into more
-# details of the igraph package a bit later, for now we just use it to plot:
+# standard package for work with graphs in r is "igraph::". We'll go into more
+# details of the igraph:: package a bit later, for now we just use it to plot:
 
 if (! requireNamespace("igraph", quietly = TRUE)) {
   install.packages("igraph")
@@ -149,7 +142,7 @@ myGxy <- igraph::layout_with_graphopt(myG,
 set.seed(NULL)                         # reset the RNG
 
 
-# The igraph package adds its own function to the collection of plot()
+# The igraph:: package adds its own function to the collection of plot()
 # functions; R makes the selection which plot function to use based on the class
 # of the object that we request to plot. This plot function has parameters
 #  layout - the x,y coordinates of the nodes;
@@ -160,14 +153,14 @@ set.seed(NULL)                         # reset the RNG
 # See ?igraph.plotting for the complete list of parameters
 
 
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myG,
      layout = myGxy,
      rescale = FALSE,
      xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
      ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
      vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 1600 + (300 * igraph::degree(myG)),
+     vertex.size = 1000 + (900 * igraph::degree(myG)),
      vertex.label = sprintf("%s(%i)", names(igraph::V(myG)), igraph::degree(myG)),
      vertex.label.family = "sans",
      vertex.label.cex = 0.7)
@@ -185,7 +178,7 @@ sum(myRandAM)
 # What about the degree distribution? We can get that simply by summing over the
 # rows and summing over the columns and adding the two vectors.
 
-rowSums(myRandAM) +  colSums(myRandAM) # check this against the plot!
+rowSums(myRandAM) + colSums(myRandAM) # check this against the plot!
 
 # The function degree() gives the same values
 igraph::degree(myG)
@@ -202,10 +195,13 @@ axis(side = 1, at = 0:7)
 # quite well, automatically. But for this purpose I want the columns of the
 # histogram to represent exactly one node-degree difference.
 
-# A degree distribution is actually quite an important descriptor of graphs,
-# since it is very sensitive to the generating mechanism. For biological
+# A degree distribution is actually a very informative descriptor of graphs,
+#  since it is very sensitive to the generating mechanism. For biological
 # networks, that is one of the key questions we are interested in: how was the
-# network formed?
+# network formed? Asking about the mechanism, the way the interactions
+# support function and shape the fitness landscape in which the cell evolves,
+# connects our mathematical abstraction with biological insight.
+
 
 # =    2  DEGREE DISTRIBUTIONS  ================================================
 
@@ -225,14 +221,14 @@ myG200 <- igraph::graph_from_adjacency_matrix(my200AM, mode = "undirected")
 myGxy <- igraph::layout_with_graphopt(myG200, charge=0.0001) # calculate layout
                                                              # coordinates
 
-oPar <- par(mar= rep(0,4))             # Turn margins off, save graphics state
+oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
 plot(myG200,
      layout = myGxy,
      rescale = FALSE,
      xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
      ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
      vertex.color=heat.colors(max(igraph::degree(myG200)+1))[igraph::degree(myG200)+1],
-     vertex.size = 150 + (60 * igraph::degree(myG200)),
+     vertex.size = 150 + (70 * igraph::degree(myG200)),
      vertex.label = NA)
 par(oPar)                              # restore graphics state
 
@@ -251,7 +247,7 @@ axis(side = 1, at = 0:10)
 # Note the pronounced peak of this distribution: this is not "scale-free".
 # Here is the log-log plot of frequency vs. degree-rank ...
 
-(freqRank <- table(dg))
+( freqRank <- table(dg) )
 plot(log10(as.numeric(names(freqRank)) + 1),
      log10(as.numeric(freqRank)), type = "b",
      pch = 21, bg = "#A5F5CC",
@@ -264,9 +260,9 @@ plot(log10(as.numeric(names(freqRank)) + 1),
 # ==   2.2  scale-free graph (Barabasi-Albert)  ================================
 
 # What does one of those intriguing "scale-free" distributions look like? The
-# iGraph package has a function to make random graphs according to the
+# iGraph:: package has a function to make random graphs according to the
 # Barabasi-Albert model of scale-free graphs. It is: sample_pa(), where pa
-# stands for "preferential attachment". Preferential attachment is one type of
+# stands for "preferential attachment". Preferential attachment is _one_ type of
 # process that will yield scale-free distributions.
 
 N <- 200
@@ -277,7 +273,7 @@ set.seed(NULL)                         # reset the RNG
 
 GBAxy <- igraph::layout_with_graphopt(GBA, charge=0.0001)
 
-oPar <- par(mar= rep(0,4))             # Turn margins off, save graphics state
+oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
 plot(GBA,
      layout = GBAxy,
      rescale = FALSE,
@@ -296,7 +292,7 @@ par(oPar)                              # restore graphics state
 # singletons.
 
 # What's the degree distribution of this graph?
-(dg <- igraph::degree(GBA))
+( dg <- igraph::degree(GBA) )
 brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
 hist(dg, breaks=brk, col="#DCF5B5",
      xlim = c(0,max(dg)+1), xaxt = "n",
@@ -314,9 +310,10 @@ plot(log10(as.numeric(names(freqRank)) + 1),
      main = "200 nodes in a preferential-attachment network")
 
 # Sort-of linear, but many of the higher ranked nodes have a frequency of only
-# one. That behaviour smooths out in larger graphs:
+# one. That behaviour smooths out in larger graphs - let's try 100000 nodes:
 #
-X <- igraph::sample_pa(1e5, power = 0.8, directed = FALSE)  # 100,000 nodes
+N <- 10000
+X <- igraph::sample_pa(N, power = 0.8, directed = FALSE)  # 100,000 nodes
 freqRank <- table(igraph::degree(X))
 plot(log10(as.numeric(names(freqRank)) + 1),
      log10(as.numeric(freqRank)), type = "b",
@@ -336,16 +333,16 @@ rm(X)
 # probability for two nodes to have an edge to be a function of their Euclidian
 # distance in the box.
 
-# Here is a function that makes an adjacency matrix for such graphs. iGraph has
-# a similar function, sample_grg(), which connects nodes that are closer than a
-# cutoff, the function I give you below is a bit more interesting since it
-# creates edges according to a probability that is determined by a generalized
-# logistic function of the distance. This sigmoidal function gives a smooth
-# cutoff and creates more "natural" graphs. Otherwise, the function is very
-# similar to the random graph function, except that we output the "coordinates"
-# of the nodes together with the adjacency matrix which we then use for the
-# layout. list() FTW.
-#
+# Here is a function that makes an adjacency matrix for such graphs. iGraph::
+# has a similar function, sample_grg(), which connects nodes that are closer
+# than a cutoff, the function I give you below is a bit more interesting since
+# it creates edges according to a probability that is determined by a
+# generalized logistic function of the distance. This sigmoidal function gives
+# a smooth cutoff and creates more "natural" graphs. Otherwise, the function is
+# very similar to the random graph function, except that we output the
+# "coordinates" of the nodes together with the adjacency matrix which we then
+# use for the layout. list() FTW.
+
 
 makeRandomGeometricAM <- function(nam, B = 25, Q = 0.001, t = 0.6) {
   # Make an adjacency matrix for an undirected random geometric graph from
@@ -400,7 +397,7 @@ makeRandomGeometricAM <- function(nam, B = 25, Q = 0.001, t = 0.6) {
 # }
 #
 # ... and this code plots p-values over the distances we could encouter between
-# our nodes: from 0 to sqrt(2) i.e. the diagonal of the unit sqaure in which we
+# our nodes: from 0 to sqrt(2) i.e. the diagonal of the unit square in which we
 # will place our nodes.
 # x <- seq(0, sqrt(2), length.out = 50)
 # plot(x, genLog(x), type="l", col="#AA0000", ylim = c(0, 1),
@@ -415,7 +412,7 @@ set.seed(NULL)                         # reset the RNG
 
 myGRG <- igraph::graph_from_adjacency_matrix(rGAM$mat, mode = "undirected")
 
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myGRG,
      layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
      rescale = FALSE,
@@ -448,12 +445,12 @@ plot(log10(as.numeric(names(freqRank)) + 1),
 
 
 
-# =    3  A CLOSER LOOK AT THE igraph PACKAGE  =================================
+# =    3  A CLOSER LOOK AT THE igraph:: PACKAGE  ===============================
 
 
 # ==   3.1  Basics  ============================================================
 
-# The basic object of the igraph package is a graph object. Let's explore the
+# The basic object of the igraph:: package is a graph object. Let's explore the
 # first graph some more, the one we built with our random gene names:
 summary(myG)
 
@@ -464,7 +461,7 @@ summary(myG)
 mode(myG)
 class(myG)
 
-# This means an igraph graph object is a special list object; it is opaque in
+# This means an igraph:: graph object is a special list object; it is opaque in
 # the sense that a user is never expected to modify its components directly, but
 # through a variety of helper functions which the package provides. There are
 # many ways to construct graphs - from adjacency matrices, as we have just done,
@@ -494,12 +491,12 @@ plot(myG)  # this is the result of default plot parameters
 # See ?plot.igraph for details."
 
 # Plot with some customizing parameters
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myG,
      layout = igraph::layout_with_fr(myG),
      vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 9 + (2 * igraph::degree(myG)),
-     vertex.label.cex = 0.5 + (0.05 * igraph::degree(myG)),
+     vertex.size = 12 + (5 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
      edge.width = 2,
      vertex.label = igraph::V(myG)$name,
      vertex.label.family = "sans",
@@ -507,25 +504,25 @@ plot(myG,
 par(oPar)
 
 # ... or with a different layout:
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myG,
      layout = igraph::layout_in_circle(myG),
      vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 9 + (2 * igraph::degree(myG)),
-     vertex.label.cex = 0.5 + (0.05 * igraph::degree(myG)),
+     vertex.size = 12 + (5 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
      edge.width = 2,
      vertex.label = igraph::V(myG)$name,
      vertex.label.family = "sans",
      vertex.label.cex = 0.9)
 par(oPar)
 
-# igraph has a large number of graph-layout functions: see
+# igraph:: has a large number of graph-layout functions: see
 # ?layout_  and try them all.
 
 
 # ==   3.2  Components  ========================================================
 
-# The igraph function components() tells us whether there are components of the
+# The function igrap::components() tells us whether there are components of the
 # graph in which there is no path to other components.
 igraph::components(myG)
 
@@ -548,7 +545,7 @@ names(c1)
 # Let's explore some of the more interesting, topological graph measures. We
 # start by building a somewhat bigger graph. We aren't quite sure whether
 # biological graphs are small-world, or random-geometric, or
-# preferential-attachment ... but igraph has ways to simulate the basic ones
+# preferential-attachment ... but igraph:: has ways to simulate the basic ones
 # (and we could easily simulate our own). Look at the following help pages:
 
 ?igraph::sample_gnm             # see also sample_gnp for the Erdös-Rényi models
@@ -573,10 +570,10 @@ plot(myG,
      xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
      ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
      vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 20 + (10 * igraph::degree(myG)),
+     vertex.size = 40 + (15 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
      vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans",
-     vertex.label.cex = 0.8)
+     vertex.label.family = "sans")
 par(oPar)                              # restore graphics state
 
 # ==   4.1  Diameter  ==========================================================
@@ -600,17 +597,17 @@ bC <- igraph::centr_betw(myG)  # calculate betweenness centrality
 nodeBetw <- bC$res
 nodeBetw <- round(log(nodeBetw +1)) + 1
 
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myG,
      layout = myGxy,
      rescale = FALSE,
      xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
      ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(nodeBetw))[nodeBetw],
-     vertex.size = 20 + (10 * igraph::degree(myG)),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 40 + (15 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
      vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans",
-     vertex.label.cex = 0.7)
+     vertex.label.family = "sans")
 par(oPar)
 
 # Note that the betweenness - the number of shortest paths that pass through a
@@ -629,7 +626,7 @@ nodeBetw <- bCmyGRG$res
 nodeBetw <- round((log(nodeBetw +1))^2.5) + 1
 
 # colours and size proportional to betweenness
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myGRG,
      layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
      rescale = FALSE,
@@ -677,18 +674,18 @@ commColors <- rep("#f1eef6", max(igraph::membership(myGRGclusters)))
 commColors[1:5] <- c("#980043", "#dd1c77", "#df65b0", "#c994c7", "#d4b9da")
 
 
-oPar <- par(mar= rep(0,4)) # Turn margins off
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
 plot(myGRG,
      layout = cbind(rGAM$x, rGAM$y),
      rescale = FALSE,
      xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
      ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
      vertex.color=commColors[igraph::membership(myGRGclusters)],
-     vertex.size = 0.1 + (0.1 * igraph::degree(myGRG)),
+     vertex.size = 0.3 + (0.4 * igraph::degree(myGRG)),
      vertex.label = NA)
 par(oPar)
 
-
+# That's all.
 
 
 # [END]