diff --git a/ABC-units.Rproj b/ABC-units.Rproj
index 9f01719..0fbb9ec 100644
--- a/ABC-units.Rproj
+++ b/ABC-units.Rproj
@@ -1,16 +1,16 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: No
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: knitr
-LaTeX: XeLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: No
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: knitr
+LaTeX: XeLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
diff --git a/FND-MAT-Graphs_and_networks.R b/FND-MAT-Graphs_and_networks.R
index 654f909..f0520d3 100644
--- a/FND-MAT-Graphs_and_networks.R
+++ b/FND-MAT-Graphs_and_networks.R
@@ -1,691 +1,691 @@
-# tocID <- "FND-MAT-Graphs_and_networks.R"
-#
-# Purpose:  A Bioinformatics Course:
-#              R code accompanying the FND-MAT-Graphs_and_networks unit.
-#
-# Version:  1.2
-#
-# Date:     2017  10  -  2019  01
-# Author:   Boris Steipe (boris.steipe@utoronto.ca)
-#
-# Versions:
-#           1.2    Change from require() to requireNamespace(),
-#                      use <package>::<function>() idiom throughout
-#           1.1    Update set.seed() usage
-#           1.0    First final version for learning units.
-#           0.1    First code copied from 2016 material.
-#
-#
-# TODO:     Add some explicit tasks and sample solutions.
-#
-#
-# == DO NOT SIMPLY  source()  THIS FILE! =======================================
-#
-# If there are portions you don't understand, use R's help system, Google for an
-# answer, or ask your instructor. Don't continue if you don't understand what's
-# going on. That's not how it works ...
-#
-# ==============================================================================
-
-
-#TOC> ==========================================================================
-#TOC> 
-#TOC>   Section  Title                                          Line
-#TOC> --------------------------------------------------------------
-#TOC>   1        REVIEW                                           50
-#TOC>   2        DEGREE DISTRIBUTIONS                            206
-#TOC>   2.1        Random graph                                  212
-#TOC>   2.2        scale-free graph (Barabasi-Albert)            260
-#TOC>   2.3        Random geometric graph                        326
-#TOC>   3        A CLOSER LOOK AT THE igraph:: PACKAGE           448
-#TOC>   3.1        Basics                                        451
-#TOC>   3.2        Components                                    523
-#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS                 542
-#TOC>   4.1        Diameter                                      579
-#TOC>   5        GRAPH CLUSTERING                                648
-#TOC> 
-#TOC> ==========================================================================
-
-
-# =    1  REVIEW  ==============================================================
-
-# This tutorial covers basic concepts of graph theory and analysis in R. Make
-# sure you have pulled the latest version of the project from the GitHub
-# repository.
-
-# Let's explore some of the basic ideas of graph theory by starting with a small
-# random graph.
-
-
-# To begin let's write a little function that will create random "gene" names;
-# there's no particular purpose to this other than to make our graphs look a
-# little more "biological" ...
-makeRandomGenenames <- function(N) {
-  nam <- character()
-  while (length(nam) < N) {
-    a <- paste0(c(sample(LETTERS, 1), sample(letters, 2)),
-                collapse="") # one uppercase, two lowercase letters
-    n <- sample(1:9, 1)      # one number
-    nam[length(nam) + 1] <- paste(a, n, sep="") # store in vector
-    nam <- unique(nam)   # delete if this was a duplicate
-  }
-  return(nam)
-}
-
-N <- 20
-
-set.seed(112358)                       # set RNG seed for repeatable randomness
-( Nnames <- makeRandomGenenames(N) )
-set.seed(NULL)                         # reset the RNG
-
-# One way to represent graphs in a computer is as an "adjacency matrix". In this
-# matrix, each row and each column represents a node, and the cell at the
-# intersection of a row and column contains a value/TRUE if there is an edge,
-# 0/FALSE otherwise.
-
-# Let's create an adjacency matrix for our random graph: let's say a pair
-# of nodes has probability p <- 0.09 to have an edge, and our graph is
-# symmetric , i.e. it is an undirected graph, and it has neither self-edges,
-# i.e. loops, nor multiple edges between the same nodes, i.e. it
-# is a "simple" graph. We use the Nnames vector as node labels.
-
-makeRandomAM <- function(nam, p = 0.1) {
-  # Make a random adjacency matrix for a set of nodes with edge probability p
-  # Parameters:
-  #   nam: a character vector of unique node names.
-  #   p:   probability that a random pair of nodes will have an edge.
-  #
-  # Value: an adjacency matrix for a simple, undirected graph
-  #
-
-  N <- length(nam)
-  AM <- matrix(numeric(N * N), ncol = N)  # Adjacency matrix, pre-filled with 0
-  rownames(AM) <- nam
-  colnames(AM) <- nam
-  for (iRow in 1:(N-1)) { # Note how we make sure iRow != iCol - this prevents
-                          # loops (self-edges)
-    for (iCol in (iRow+1):N) {
-      if (runif(1) < p) {     # runif() creates uniform random numbers
-                              # between 0 and 1. The expression is TRUE with
-                              # probability p. if it is TRUE ...
-        AM[iRow, iCol] <- 1   # ... record an edge for the pair (iRow, iCol)
-      }
-    }
-  }
-  return(AM)
-}
-
-set.seed(112358)                       # set RNG seed for repeatable randomness
-( myRandAM <- makeRandomAM(Nnames, p = 0.09) )
-set.seed(NULL)                         # reset the RNG
-
-
-# Listing the matrix is not very informative - we should plot this graph. The
-# standard package for work with graphs in r is "igraph::". We'll go into more
-# details of the igraph:: package a bit later, for now we just use it to plot:
-
-if (! requireNamespace("igraph", quietly = TRUE)) {
-  install.packages("igraph")
-}
-# Package information:
-#  library(help = igraph)       # basic information
-#  browseVignettes("igraph")    # available vignettes
-#  data(package = "igraph")     # available datasets
-
-
-myG <- igraph::graph_from_adjacency_matrix(myRandAM, mode = "undirected")
-
-set.seed(112358)                       # set RNG seed for repeatable randomness
-                                       # calculate layout coordinates
-myGxy <- igraph::layout_with_graphopt(myG,
-                                      charge=0.0012)
-set.seed(NULL)                         # reset the RNG
-
-
-# The igraph:: package adds its own function to the collection of plot()
-# functions; R makes the selection which plot function to use based on the class
-# of the object that we request to plot. This plot function has parameters
-#  layout - the x,y coordinates of the nodes;
-#  vertex.color - which I define to color by node-degree
-#  vertex size - which I define to increase with node-degree
-#  vertex.label - which I set to combine the names of the vertices of the
-#                 graph - names(V(iG)) - with the node degree - degree(iG).
-# See ?igraph.plotting for the complete list of parameters
-
-
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myG,
-     layout = myGxy,
-     rescale = FALSE,
-     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
-     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 1000 + (900 * igraph::degree(myG)),
-     vertex.label = sprintf("%s(%i)", names(igraph::V(myG)), igraph::degree(myG)),
-     vertex.label.family = "sans",
-     vertex.label.cex = 0.7)
-par(oPar)  # reset plot window
-
-
-# The simplest descriptor of a graph are the number of nodes, edges, and the
-# degree-distribution. In our example, the number of nodes was given: N; the
-# number of edges can easily be calculated from the adjacency matrix. In our
-# matrix, we have entered 1 for every edge. Thus we simply sum over the matrix:
-sum(myRandAM)
-
-# Is that what you expect?
-
-# What about the degree distribution? We can get that simply by summing over the
-# rows and summing over the columns and adding the two vectors.
-
-rowSums(myRandAM) + colSums(myRandAM) # check this against the plot!
-
-# The function degree() gives the same values
-igraph::degree(myG)
-
-# Let's  plot the degree distribution in a histogram:
-degG <- igraph::degree(myG)
-brk <- seq(min(degG)-0.5, max(degG)+0.5, by=1)  # define histogram breaks
-hist(degG, breaks=brk, col="#A5CCF5",
-     xlim = c(-1,8), xaxt = "n",
-     main = "Node degrees", xlab = "Degree", ylab = "Number")  # plot histogram
-axis(side = 1, at = 0:7)
-
-# Note: I don't _have_ to define breaks, the hist() function usually does so
-# quite well, automatically. But for this purpose I want the columns of the
-# histogram to represent exactly one node-degree difference.
-
-# A degree distribution is actually a very informative descriptor of graphs,
-#  since it is very sensitive to the generating mechanism. For biological
-# networks, that is one of the key questions we are interested in: how was the
-# network formed? Asking about the mechanism, the way the interactions
-# support function and shape the fitness landscape in which the cell evolves,
-# connects our mathematical abstraction with biological insight.
-
-
-# =    2  DEGREE DISTRIBUTIONS  ================================================
-
-# Let's simulate a few graphs that are a bit bigger to get a better sense of
-# their degree distributions:
-#
-
-# ==   2.1  Random graph  ======================================================
-
-N <- 200
-
-set.seed(31415927)                     # set RNG seed for repeatable randomness
-my200AM <- makeRandomAM(as.character(1:N), p = 0.015)
-set.seed(NULL)                         # reset the RNG
-
-myG200 <- igraph::graph_from_adjacency_matrix(my200AM, mode = "undirected")
-myGxy <- igraph::layout_with_graphopt(myG200, charge=0.0001) # calculate layout
-                                                             # coordinates
-
-oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
-plot(myG200,
-     layout = myGxy,
-     rescale = FALSE,
-     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
-     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(igraph::degree(myG200)+1))[igraph::degree(myG200)+1],
-     vertex.size = 150 + (70 * igraph::degree(myG200)),
-     vertex.label = NA)
-par(oPar)                              # restore graphics state
-
-# This graph has thirteen singletons and one large, connected component. Many
-# biological graphs look approximately like this.
-
-# Calculate degree distributions
-dg <- igraph::degree(myG200)
-brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
-hist(dg, breaks=brk, col="#A5F5CC",
-     xlim = c(-1,11), xaxt = "n",
-     main = "Node degrees", xlab = "Degree", ylab = "Number")  # plot histogram
-axis(side = 1, at = 0:10)
-
-
-# Note the pronounced peak of this distribution: this is not "scale-free".
-# Here is the log-log plot of frequency vs. degree-rank ...
-
-( freqRank <- table(dg) )
-plot(log10(as.numeric(names(freqRank)) + 1),
-     log10(as.numeric(freqRank)), type = "b",
-     pch = 21, bg = "#A5F5CC",
-     xlab = "log(Rank)", ylab = "log(frequency)",
-     main = "200 nodes in a random network")
-
-# ... which shows us that this does NOT correspond to the single-slope linear
-# relationship that we expect for a "scale-free" graph.
-
-# ==   2.2  scale-free graph (Barabasi-Albert)  ================================
-
-# What does one of those intriguing "scale-free" distributions look like? The
-# iGraph:: package has a function to make random graphs according to the
-# Barabasi-Albert model of scale-free graphs. It is: sample_pa(), where pa
-# stands for "preferential attachment". Preferential attachment is _one_ type of
-# process that will yield scale-free distributions.
-
-N <- 200
-
-set.seed(31415927)                     # set RNG seed for repeatable randomness
-GBA <- igraph::sample_pa(N, power = 0.8, directed = FALSE)
-set.seed(NULL)                         # reset the RNG
-
-GBAxy <- igraph::layout_with_graphopt(GBA, charge=0.0001)
-
-oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
-plot(GBA,
-     layout = GBAxy,
-     rescale = FALSE,
-     xlim = c(min(GBAxy[,1]) * 0.99, max(GBAxy[,1]) * 1.01),
-     ylim = c(min(GBAxy[,2]) * 0.99, max(GBAxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
-     vertex.size = 200 + (30 * igraph::degree(GBA)),
-     vertex.label = NA)
-par(oPar)                              # restore graphics state
-
-# This is a very obviously different graph! Some biological networks have
-# features that look like that - but in my experience the hub nodes are usually
-# not that distinct. But then again, that really depends on the parameter
-# "power". Play with the "power" parameter and get a sense for what difference
-# this makes. Also: note that the graph has only a single component - no
-# singletons.
-
-# What's the degree distribution of this graph?
-( dg <- igraph::degree(GBA) )
-brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
-hist(dg, breaks=brk, col="#DCF5B5",
-     xlim = c(0,max(dg)+1), xaxt = "n",
-     main = "Node degrees 200 nodes PA graph",
-     xlab = "Degree", ylab = "Number")
-axis(side = 1, at = seq(0, max(dg)+1, by=5))
-
-# Most nodes have a degree of 1, but one node has a degree of 19.
-
-(freqRank <- table(dg))
-plot(log10(as.numeric(names(freqRank)) + 1),
-     log10(as.numeric(freqRank)), type = "b",
-     pch = 21, bg = "#DCF5B5",
-     xlab = "log(Rank)", ylab = "log(frequency)",
-     main = "200 nodes in a preferential-attachment network")
-
-# Sort-of linear, but many of the higher ranked nodes have a frequency of only
-# one. That behaviour smooths out in larger graphs - let's try 100000 nodes:
-#
-N <- 10000
-X <- igraph::sample_pa(N, power = 0.8, directed = FALSE)  # 100,000 nodes
-freqRank <- table(igraph::degree(X))
-plot(log10(as.numeric(names(freqRank)) + 1),
-     log10(as.numeric(freqRank)), type = "b",
-     xlab = "log(Rank)", ylab = "log(frequency)",
-     pch = 21, bg = "#A5F5CC",
-     main = "100,000 nodes in a random, scale-free network")
-rm(X)
-
-
-# ==   2.3  Random geometric graph  ============================================
-
-# Finally, let's simulate a random geometric graph and look at the degree
-# distribution. Remember: these graphs have a high probability to have edges
-# between nodes that are "close" together - an entirely biological notion.
-
-# We'll randomly place our nodes in a box. Then we'll define the
-# probability for two nodes to have an edge to be a function of their Euclidian
-# distance in the box.
-
-# Here is a function that makes an adjacency matrix for such graphs. iGraph::
-# has a similar function, sample_grg(), which connects nodes that are closer
-# than a cutoff, the function I give you below is a bit more interesting since
-# it creates edges according to a probability that is determined by a
-# generalized logistic function of the distance. This sigmoidal function gives
-# a smooth cutoff and creates more "natural" graphs. Otherwise, the function is
-# very similar to the random graph function, except that we output the
-# "coordinates" of the nodes together with the adjacency matrix which we then
-# use for the layout. list() FTW.
-
-
-makeRandomGeometricAM <- function(nam, B = 25, Q = 0.001, t = 0.6) {
-  # Make an adjacency matrix for an undirected random geometric graph from
-  #    edges connected with probabilities according to a generalized logistic
-  #    function.
-  # Parameters:
-  #    nam: a character vector of unique names
-  #    B, Q, t:   probability that a random pair (i, j) of nodes gets an
-  #                 edge determined by a generalized logistic function
-  #                 p <- 1 - 1/((1 + (Q * (exp(-B * (x-t)))))^(1 / 0.9)))
-  #
-  # Value: a list with the following components:
-  #        AM$mat : an adjacency matrix
-  #        AM$nam : labels for the nodes
-  #        AM$x   : x-coordinates for the nodes
-  #        AM$y   : y-coordinates for the nodes
-  #
-  nu <- 1  # probably not useful to change
-  AM <- list()
-  AM$nam <- nam
-  N <- length(AM$nam)
-  AM$mat <- matrix(numeric(N * N), ncol = N)  # The adjacency matrix
-  rownames(AM$mat) <- AM$nam
-  colnames(AM$mat) <- AM$nam
-  AM$x <- runif(N) # Randomly place nodes into the unit square
-  AM$y <- runif(N)
-  for (iRow in 1:(N-1)) { # Same principles as in makeRandomGraph()
-    for (iCol in (iRow+1):N) {
-      # geometric distance ...
-      d <- sqrt((AM$x[iRow] - AM$x[iCol])^2 +
-                (AM$y[iRow] - AM$y[iCol])^2)  # Pythagoras
-      # distance dependent probability
-      p <- 1 - 1/((1 + (Q * (exp(-B * (d-t)))))^(1 / nu))
-      if (runif(1) < p) {
-        AM$mat[iRow, iCol] <- 1
-      }
-    }
-  }
-  return(AM)
-}
-
-# Getting the parameters of a generalized logistic right takes a bit of
-# experimenting. If you are interested, you can try a few variations. Or you can
-# look up the function at
-# https://en.wikipedia.org/wiki/Generalised_logistic_function
-
-# This function computes generalized logistics ...
-# genLog <- function(x, B = 25, Q = 0.001, t = 0.5) {
-#     # generalized logistic (sigmoid)
-#     nu <- 1
-#     return(1 - 1/((1 + (Q * (exp(-B * (x-t)))))^(1 / nu)))
-# }
-#
-# ... and this code plots p-values over the distances we could encouter between
-# our nodes: from 0 to sqrt(2) i.e. the diagonal of the unit square in which we
-# will place our nodes.
-# x <- seq(0, sqrt(2), length.out = 50)
-# plot(x, genLog(x), type="l", col="#AA0000", ylim = c(0, 1),
-#      xlab = "d", ylab = "p(edge)")
-
-# 200 node random geomteric graph
-N <- 200
-set.seed(112358)                       # set RNG seed for repeatable randomness
-rGAM <- makeRandomGeometricAM(as.character(1:N), t = 0.4)
-set.seed(NULL)                         # reset the RNG
-
-
-myGRG <- igraph::graph_from_adjacency_matrix(rGAM$mat, mode = "undirected")
-
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myGRG,
-     layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
-     rescale = FALSE,
-     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
-     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
-     vertex.color=heat.colors(max(igraph::degree(myGRG)+1))[igraph::degree(myGRG)+1],
-     vertex.size = 0.1 + (0.2 * igraph::degree(myGRG)),
-     vertex.label = NA)
-par(oPar)
-
-# degree distribution:
-(dg <- igraph::degree(myGRG))
-brk <- seq(min(dg) - 0.5, max(dg) + 0.5, by = 1)
-hist(dg, breaks = brk, col = "#FCC6D2",
-     xlim = c(0, 25), xaxt = "n",
-     main = "Node degrees: 200 nodes RG graph",
-     xlab = "Degree", ylab = "Number")
-axis(side = 1, at = c(0, min(dg):max(dg)))
-
-# You'll find that this is kind of in-between the random, and the scale-free
-# graph. We do have hubs, but they are not as extreme as in the scale-free case;
-# and we have have no singletons, in contrast to the random graph.
-
-(freqRank <- table(dg))
-plot(log10(as.numeric(names(freqRank)) + 1),
-     log10(as.numeric(freqRank)), type = "b",
-     pch = 21, bg = "#FCC6D2",
-     xlab = "log(Rank)", ylab = "log(frequency)",
-     main = "200 nodes in a random geometric network")
-
-
-
-# =    3  A CLOSER LOOK AT THE igraph:: PACKAGE  ===============================
-
-
-# ==   3.1  Basics  ============================================================
-
-# The basic object of the igraph:: package is a graph object. Let's explore the
-# first graph some more, the one we built with our random gene names:
-summary(myG)
-
-# This output means: this is an IGRAPH graph, with U = UN-directed edges
-#  and N = named nodes, that has 20 nodes and 20 edges. For details, see
-?igraph::print.igraph
-
-mode(myG)
-class(myG)
-
-# This means an igraph:: graph object is a special list object; it is opaque in
-# the sense that a user is never expected to modify its components directly, but
-# through a variety of helper functions which the package provides. There are
-# many ways to construct graphs - from adjacency matrices, as we have just done,
-# from edge lists, or by producing random graphs according to a variety of
-# recipes, called _games_ in this package.
-
-# Two basic functions retrieve nodes "Vertices", and "Edges":
-igraph::V(myG)
-igraph::E(myG)
-
-# additional properties can be retrieved from the Vertices ...
-igraph::V(myG)$name
-
-
-# As with many R objects, loading the package provides special functions that
-# can be accessed via the same name as the basic R functions, for example:
-
-print(myG)
-plot(myG)  # this is the result of default plot parameters
-
-# ... where plot() allows the usual flexibility of fine-tuning the plot. We
-# first layout the node coordinates with the Fruchtermann-Reingold algorithm - a
-# force-directed layout that applies an ettractive potential along edges (which
-# pulls nodes together) and a repulsive potential to nodes (so they don't
-# overlap). Note the use of the degree() function to color and scale nodes and
-# labels by degree and the use of the V() function to retrieve the vertex names.
-# See ?plot.igraph for details."
-
-# Plot with some customizing parameters
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myG,
-     layout = igraph::layout_with_fr(myG),
-     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 12 + (5 * igraph::degree(myG)),
-     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
-     edge.width = 2,
-     vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans",
-     vertex.label.cex = 0.9)
-par(oPar)
-
-# ... or with a different layout:
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myG,
-     layout = igraph::layout_in_circle(myG),
-     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 12 + (5 * igraph::degree(myG)),
-     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
-     edge.width = 2,
-     vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans",
-     vertex.label.cex = 0.9)
-par(oPar)
-
-# igraph:: has a large number of graph-layout functions: see
-# ?layout_  and try them all.
-
-
-# ==   3.2  Components  ========================================================
-
-# The function igrap::components() tells us whether there are components of the
-# graph in which there is no path to other components.
-igraph::components(myG)
-
-# In the _membership_ vector, nodes are annotated with the index of the
-# component they are part of. Sui7 is the only node of component 2, Cyj1 is in
-# the third component etc. This is perhaps more clear if we sort by component
-# index
-sort(igraph::components(myG)$membership, decreasing = TRUE)
-
-# Retrieving e.g. the members of the first component from the list can be done by subsetting:
-
-(sel <- igraph::components(myG)$membership == 1)  # boolean vector ..
-(c1 <- igraph::components(myG)$membership[sel])
-names(c1)
-
-
-# =    4  RANDOM GRAPHS AND GRAPH METRICS  =====================================
-
-
-# Let's explore some of the more interesting, topological graph measures. We
-# start by building a somewhat bigger graph. We aren't quite sure whether
-# biological graphs are small-world, or random-geometric, or
-# preferential-attachment ... but igraph:: has ways to simulate the basic ones
-# (and we could easily simulate our own). Look at the following help pages:
-
-?igraph::sample_gnm             # see also sample_gnp for the Erdös-Rényi models
-?igraph::sample_smallworld      # for the Watts & Strogatz model
-?igraph::sample_pa              # for the Barabasi-Albert model
-
-# But note that there are many more sample_ functions. Check out the docs!
-
-# Let's look at betweenness measures for our first graph. Here: the nodes again
-# colored by degree. Degree centrality states: nodes of higher degree are
-# considered to be more central. And that's also the way the force-directed
-# layout drawas them, obviously.
-
-set.seed(112358)                       # set RNG seed for repeatable randomness
-myGxy <- igraph::layout_with_fr(myG)   # calculate layout coordinates
-set.seed(NULL)                         # reset the RNG
-
-oPar <- par(mar = rep(0, 4))           # turn margins off, save graphics state
-plot(myG,
-     layout = myGxy,
-     rescale = FALSE,
-     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
-     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 40 + (15 * igraph::degree(myG)),
-     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
-     vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans")
-par(oPar)                              # restore graphics state
-
-# ==   4.1  Diameter  ==========================================================
-
-igraph::diameter(myG)  # The diameter of a graph is its maximum length
-                       # shortest path.
-
-# let's plot this path: here are the nodes ...
-igraph::get_diameter(myG)
-
-# ... and we can get the x, y coordinates from iGxy by subsetting with the node
-# names. The we draw the diameter-path with a transparent, thick pink line:
-lines(myGxy[igraph::get_diameter(myG),], lwd=10, col="#ff63a788")
-
-# == Centralization scores
-
-?igraph::centralize
-# replot our graph, and color by log_betweenness:
-
-bC <- igraph::centr_betw(myG)  # calculate betweenness centrality
-nodeBetw <- bC$res
-nodeBetw <- round(log(nodeBetw +1)) + 1
-
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myG,
-     layout = myGxy,
-     rescale = FALSE,
-     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
-     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
-     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
-     vertex.size = 40 + (15 * igraph::degree(myG)),
-     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
-     vertex.label = igraph::V(myG)$name,
-     vertex.label.family = "sans")
-par(oPar)
-
-# Note that the betweenness - the number of shortest paths that pass through a
-# node, is in general higher for high-degree nodes - but not always: Eqr2 has
-# higher betweenness than Itv7: this measure really depends on the detailed
-# local topology of the graph."
-
-# Can you use centr_eigen() and centr_degree() to calculate the respective
-# values? That's something I would expect you to be able to do.
-#
-# Lets plot betweenness centrality for our random geometric graph:
-
-bCmyGRG <- igraph::centr_betw(myGRG)  # calculate betweenness centrality
-
-nodeBetw <- bCmyGRG$res
-nodeBetw <- round((log(nodeBetw +1))^2.5) + 1
-
-# colours and size proportional to betweenness
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myGRG,
-     layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
-     rescale = FALSE,
-     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
-     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
-     vertex.color=heat.colors(max(nodeBetw))[nodeBetw],
-     vertex.size = 0.1 + (0.03 * nodeBetw),
-     vertex.label = NA)
-par(oPar)
-
-igraph::diameter(myGRG)
-lines(rGAM$x[igraph::get_diameter(myGRG)],
-      rGAM$y[igraph::get_diameter(myGRG)],
-      lwd = 10,
-      col = "#ff335533")
-
-
-
-# =    5  GRAPH CLUSTERING  ====================================================
-
-
-# Clustering finds "communities" in graphs - and depending what the edges
-# represent, these could be complexes, pathways, biological systems or similar.
-# There are many graph-clustering algorithms. One approach with many attractive
-# properties is the Map Equation, developed by Martin Rosvall. See:
-# http://www.ncbi.nlm.nih.gov/pubmed/18216267 and htttp://www.mapequation.org
-
-
-myGRGclusters <- igraph::cluster_infomap(myGRG)
-igraph::modularity(myGRGclusters)  # ... measures how separated the different
-                                   # membership types are from each other
-igraph::membership(myGRGclusters)         # which nodes are in what cluster?
-table(igraph::membership(myGRGclusters))  # how large are the clusters?
-
-# The largest cluster has 48 members, the second largest has 25, etc.
-
-
-# Lets plot our graph again, coloring the nodes of the first five communities by
-# their cluster membership:
-
-# first, make a vector with as many grey colors as we have communities ...
-commColors <- rep("#f1eef6", max(igraph::membership(myGRGclusters)))
-# ... then overwrite the first five with "real colors" - something like rust,
-# lilac, pink, and mauve or so.
-commColors[1:5] <- c("#980043", "#dd1c77", "#df65b0", "#c994c7", "#d4b9da")
-
-
-oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
-plot(myGRG,
-     layout = cbind(rGAM$x, rGAM$y),
-     rescale = FALSE,
-     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
-     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
-     vertex.color=commColors[igraph::membership(myGRGclusters)],
-     vertex.size = 0.3 + (0.4 * igraph::degree(myGRG)),
-     vertex.label = NA)
-par(oPar)
-
-# That's all.
-
-
-# [END]
+# tocID <- "FND-MAT-Graphs_and_networks.R"
+#
+# Purpose:  A Bioinformatics Course:
+#              R code accompanying the FND-MAT-Graphs_and_networks unit.
+#
+# Version:  1.2
+#
+# Date:     2017  10  -  2019  01
+# Author:   Boris Steipe (boris.steipe@utoronto.ca)
+#
+# Versions:
+#           1.2    Change from require() to requireNamespace(),
+#                      use <package>::<function>() idiom throughout
+#           1.1    Update set.seed() usage
+#           1.0    First final version for learning units.
+#           0.1    First code copied from 2016 material.
+#
+#
+# TODO:     Add some explicit tasks and sample solutions.
+#
+#
+# == DO NOT SIMPLY  source()  THIS FILE! =======================================
+#
+# If there are portions you don't understand, use R's help system, Google for an
+# answer, or ask your instructor. Don't continue if you don't understand what's
+# going on. That's not how it works ...
+#
+# ==============================================================================
+
+
+#TOC> ==========================================================================
+#TOC>
+#TOC>   Section  Title                                          Line
+#TOC> --------------------------------------------------------------
+#TOC>   1        REVIEW                                           50
+#TOC>   2        DEGREE DISTRIBUTIONS                            206
+#TOC>   2.1        Random graph                                  212
+#TOC>   2.2        scale-free graph (Barabasi-Albert)            260
+#TOC>   2.3        Random geometric graph                        326
+#TOC>   3        A CLOSER LOOK AT THE igraph:: PACKAGE           448
+#TOC>   3.1        Basics                                        451
+#TOC>   3.2        Components                                    523
+#TOC>   4        RANDOM GRAPHS AND GRAPH METRICS                 542
+#TOC>   4.1        Diameter                                      579
+#TOC>   5        GRAPH CLUSTERING                                648
+#TOC>
+#TOC> ==========================================================================
+
+
+# =    1  REVIEW  ==============================================================
+
+# This tutorial covers basic concepts of graph theory and analysis in R. Make
+# sure you have pulled the latest version of the project from the GitHub
+# repository.
+
+# Let's explore some of the basic ideas of graph theory by starting with a small
+# random graph.
+
+
+# To begin let's write a little function that will create random "gene" names;
+# there's no particular purpose to this other than to make our graphs look a
+# little more "biological" ...
+makeRandomGenenames <- function(N) {
+  nam <- character()
+  while (length(nam) < N) {
+    a <- paste0(c(sample(LETTERS, 1), sample(letters, 2)),
+                collapse="") # one uppercase, two lowercase letters
+    n <- sample(1:9, 1)      # one number
+    nam[length(nam) + 1] <- paste(a, n, sep="") # store in vector
+    nam <- unique(nam)   # delete if this was a duplicate
+  }
+  return(nam)
+}
+
+N <- 20
+
+set.seed(112358)                       # set RNG seed for repeatable randomness
+( Nnames <- makeRandomGenenames(N) )
+set.seed(NULL)                         # reset the RNG
+
+# One way to represent graphs in a computer is as an "adjacency matrix". In this
+# matrix, each row and each column represents a node, and the cell at the
+# intersection of a row and column contains a value/TRUE if there is an edge,
+# 0/FALSE otherwise.
+
+# Let's create an adjacency matrix for our random graph: let's say a pair
+# of nodes has probability p <- 0.09 to have an edge, and our graph is
+# symmetric , i.e. it is an undirected graph, and it has neither self-edges,
+# i.e. loops, nor multiple edges between the same nodes, i.e. it
+# is a "simple" graph. We use the Nnames vector as node labels.
+
+makeRandomAM <- function(nam, p = 0.1) {
+  # Make a random adjacency matrix for a set of nodes with edge probability p
+  # Parameters:
+  #   nam: a character vector of unique node names.
+  #   p:   probability that a random pair of nodes will have an edge.
+  #
+  # Value: an adjacency matrix for a simple, undirected graph
+  #
+
+  N <- length(nam)
+  AM <- matrix(numeric(N * N), ncol = N)  # Adjacency matrix, pre-filled with 0
+  rownames(AM) <- nam
+  colnames(AM) <- nam
+  for (iRow in 1:(N-1)) { # Note how we make sure iRow != iCol - this prevents
+                          # loops (self-edges)
+    for (iCol in (iRow+1):N) {
+      if (runif(1) < p) {     # runif() creates uniform random numbers
+                              # between 0 and 1. The expression is TRUE with
+                              # probability p. if it is TRUE ...
+        AM[iRow, iCol] <- 1   # ... record an edge for the pair (iRow, iCol)
+      }
+    }
+  }
+  return(AM)
+}
+
+set.seed(112358)                       # set RNG seed for repeatable randomness
+( myRandAM <- makeRandomAM(Nnames, p = 0.09) )
+set.seed(NULL)                         # reset the RNG
+
+
+# Listing the matrix is not very informative - we should plot this graph. The
+# standard package for work with graphs in r is "igraph::". We'll go into more
+# details of the igraph:: package a bit later, for now we just use it to plot:
+
+if (! requireNamespace("igraph", quietly = TRUE)) {
+  install.packages("igraph")
+}
+# Package information:
+#  library(help = igraph)       # basic information
+#  browseVignettes("igraph")    # available vignettes
+#  data(package = "igraph")     # available datasets
+
+
+myG <- igraph::graph_from_adjacency_matrix(myRandAM, mode = "undirected")
+
+set.seed(112358)                       # set RNG seed for repeatable randomness
+                                       # calculate layout coordinates
+myGxy <- igraph::layout_with_graphopt(myG,
+                                      charge=0.0012)
+set.seed(NULL)                         # reset the RNG
+
+
+# The igraph:: package adds its own function to the collection of plot()
+# functions; R makes the selection which plot function to use based on the class
+# of the object that we request to plot. This plot function has parameters
+#  layout - the x,y coordinates of the nodes;
+#  vertex.color - which I define to color by node-degree
+#  vertex size - which I define to increase with node-degree
+#  vertex.label - which I set to combine the names of the vertices of the
+#                 graph - names(V(iG)) - with the node degree - degree(iG).
+# See ?igraph.plotting for the complete list of parameters
+
+
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myG,
+     layout = myGxy,
+     rescale = FALSE,
+     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
+     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 1000 + (900 * igraph::degree(myG)),
+     vertex.label = sprintf("%s(%i)", names(igraph::V(myG)), igraph::degree(myG)),
+     vertex.label.family = "sans",
+     vertex.label.cex = 0.7)
+par(oPar)  # reset plot window
+
+
+# The simplest descriptor of a graph are the number of nodes, edges, and the
+# degree-distribution. In our example, the number of nodes was given: N; the
+# number of edges can easily be calculated from the adjacency matrix. In our
+# matrix, we have entered 1 for every edge. Thus we simply sum over the matrix:
+sum(myRandAM)
+
+# Is that what you expect?
+
+# What about the degree distribution? We can get that simply by summing over the
+# rows and summing over the columns and adding the two vectors.
+
+rowSums(myRandAM) + colSums(myRandAM) # check this against the plot!
+
+# The function degree() gives the same values
+igraph::degree(myG)
+
+# Let's  plot the degree distribution in a histogram:
+degG <- igraph::degree(myG)
+brk <- seq(min(degG)-0.5, max(degG)+0.5, by=1)  # define histogram breaks
+hist(degG, breaks=brk, col="#A5CCF5",
+     xlim = c(-1,8), xaxt = "n",
+     main = "Node degrees", xlab = "Degree", ylab = "Number")  # plot histogram
+axis(side = 1, at = 0:7)
+
+# Note: I don't _have_ to define breaks, the hist() function usually does so
+# quite well, automatically. But for this purpose I want the columns of the
+# histogram to represent exactly one node-degree difference.
+
+# A degree distribution is actually a very informative descriptor of graphs,
+#  since it is very sensitive to the generating mechanism. For biological
+# networks, that is one of the key questions we are interested in: how was the
+# network formed? Asking about the mechanism, the way the interactions
+# support function and shape the fitness landscape in which the cell evolves,
+# connects our mathematical abstraction with biological insight.
+
+
+# =    2  DEGREE DISTRIBUTIONS  ================================================
+
+# Let's simulate a few graphs that are a bit bigger to get a better sense of
+# their degree distributions:
+#
+
+# ==   2.1  Random graph  ======================================================
+
+N <- 200
+
+set.seed(31415927)                     # set RNG seed for repeatable randomness
+my200AM <- makeRandomAM(as.character(1:N), p = 0.015)
+set.seed(NULL)                         # reset the RNG
+
+myG200 <- igraph::graph_from_adjacency_matrix(my200AM, mode = "undirected")
+myGxy <- igraph::layout_with_graphopt(myG200, charge=0.0001) # calculate layout
+                                                             # coordinates
+
+oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
+plot(myG200,
+     layout = myGxy,
+     rescale = FALSE,
+     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
+     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
+     vertex.color=heat.colors(max(igraph::degree(myG200)+1))[igraph::degree(myG200)+1],
+     vertex.size = 150 + (70 * igraph::degree(myG200)),
+     vertex.label = NA)
+par(oPar)                              # restore graphics state
+
+# This graph has thirteen singletons and one large, connected component. Many
+# biological graphs look approximately like this.
+
+# Calculate degree distributions
+dg <- igraph::degree(myG200)
+brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
+hist(dg, breaks=brk, col="#A5F5CC",
+     xlim = c(-1,11), xaxt = "n",
+     main = "Node degrees", xlab = "Degree", ylab = "Number")  # plot histogram
+axis(side = 1, at = 0:10)
+
+
+# Note the pronounced peak of this distribution: this is not "scale-free".
+# Here is the log-log plot of frequency vs. degree-rank ...
+
+( freqRank <- table(dg) )
+plot(log10(as.numeric(names(freqRank)) + 1),
+     log10(as.numeric(freqRank)), type = "b",
+     pch = 21, bg = "#A5F5CC",
+     xlab = "log(Rank)", ylab = "log(frequency)",
+     main = "200 nodes in a random network")
+
+# ... which shows us that this does NOT correspond to the single-slope linear
+# relationship that we expect for a "scale-free" graph.
+
+# ==   2.2  scale-free graph (Barabasi-Albert)  ================================
+
+# What does one of those intriguing "scale-free" distributions look like? The
+# iGraph:: package has a function to make random graphs according to the
+# Barabasi-Albert model of scale-free graphs. It is: sample_pa(), where pa
+# stands for "preferential attachment". Preferential attachment is _one_ type of
+# process that will yield scale-free distributions.
+
+N <- 200
+
+set.seed(31415927)                     # set RNG seed for repeatable randomness
+GBA <- igraph::sample_pa(N, power = 0.8, directed = FALSE)
+set.seed(NULL)                         # reset the RNG
+
+GBAxy <- igraph::layout_with_graphopt(GBA, charge=0.0001)
+
+oPar <- par(mar= c(0, 0, 0, 0))             # Turn margins off, save graphics state
+plot(GBA,
+     layout = GBAxy,
+     rescale = FALSE,
+     xlim = c(min(GBAxy[,1]) * 0.99, max(GBAxy[,1]) * 1.01),
+     ylim = c(min(GBAxy[,2]) * 0.99, max(GBAxy[,2]) * 1.01),
+     vertex.color=heat.colors(max(igraph::degree(GBA)+1))[igraph::degree(GBA)+1],
+     vertex.size = 200 + (30 * igraph::degree(GBA)),
+     vertex.label = NA)
+par(oPar)                              # restore graphics state
+
+# This is a very obviously different graph! Some biological networks have
+# features that look like that - but in my experience the hub nodes are usually
+# not that distinct. But then again, that really depends on the parameter
+# "power". Play with the "power" parameter and get a sense for what difference
+# this makes. Also: note that the graph has only a single component - no
+# singletons.
+
+# What's the degree distribution of this graph?
+( dg <- igraph::degree(GBA) )
+brk <- seq(min(dg)-0.5, max(dg)+0.5, by=1)
+hist(dg, breaks=brk, col="#DCF5B5",
+     xlim = c(0,max(dg)+1), xaxt = "n",
+     main = "Node degrees 200 nodes PA graph",
+     xlab = "Degree", ylab = "Number")
+axis(side = 1, at = seq(0, max(dg)+1, by=5))
+
+# Most nodes have a degree of 1, but one node has a degree of 19.
+
+(freqRank <- table(dg))
+plot(log10(as.numeric(names(freqRank)) + 1),
+     log10(as.numeric(freqRank)), type = "b",
+     pch = 21, bg = "#DCF5B5",
+     xlab = "log(Rank)", ylab = "log(frequency)",
+     main = "200 nodes in a preferential-attachment network")
+
+# Sort-of linear, but many of the higher ranked nodes have a frequency of only
+# one. That behaviour smooths out in larger graphs - let's try 100000 nodes:
+#
+N <- 10000
+X <- igraph::sample_pa(N, power = 0.8, directed = FALSE)  # 100,000 nodes
+freqRank <- table(igraph::degree(X))
+plot(log10(as.numeric(names(freqRank)) + 1),
+     log10(as.numeric(freqRank)), type = "b",
+     xlab = "log(Rank)", ylab = "log(frequency)",
+     pch = 21, bg = "#A5F5CC",
+     main = "100,000 nodes in a random, scale-free network")
+rm(X)
+
+
+# ==   2.3  Random geometric graph  ============================================
+
+# Finally, let's simulate a random geometric graph and look at the degree
+# distribution. Remember: these graphs have a high probability to have edges
+# between nodes that are "close" together - an entirely biological notion.
+
+# We'll randomly place our nodes in a box. Then we'll define the
+# probability for two nodes to have an edge to be a function of their Euclidian
+# distance in the box.
+
+# Here is a function that makes an adjacency matrix for such graphs. iGraph::
+# has a similar function, sample_grg(), which connects nodes that are closer
+# than a cutoff, the function I give you below is a bit more interesting since
+# it creates edges according to a probability that is determined by a
+# generalized logistic function of the distance. This sigmoidal function gives
+# a smooth cutoff and creates more "natural" graphs. Otherwise, the function is
+# very similar to the random graph function, except that we output the
+# "coordinates" of the nodes together with the adjacency matrix which we then
+# use for the layout. list() FTW.
+
+
+makeRandomGeometricAM <- function(nam, B = 25, Q = 0.001, t = 0.6) {
+  # Make an adjacency matrix for an undirected random geometric graph from
+  #    edges connected with probabilities according to a generalized logistic
+  #    function.
+  # Parameters:
+  #    nam: a character vector of unique names
+  #    B, Q, t:   probability that a random pair (i, j) of nodes gets an
+  #                 edge determined by a generalized logistic function
+  #                 p <- 1 - 1/((1 + (Q * (exp(-B * (x-t)))))^(1 / 0.9)))
+  #
+  # Value: a list with the following components:
+  #        AM$mat : an adjacency matrix
+  #        AM$nam : labels for the nodes
+  #        AM$x   : x-coordinates for the nodes
+  #        AM$y   : y-coordinates for the nodes
+  #
+  nu <- 1  # probably not useful to change
+  AM <- list()
+  AM$nam <- nam
+  N <- length(AM$nam)
+  AM$mat <- matrix(numeric(N * N), ncol = N)  # The adjacency matrix
+  rownames(AM$mat) <- AM$nam
+  colnames(AM$mat) <- AM$nam
+  AM$x <- runif(N) # Randomly place nodes into the unit square
+  AM$y <- runif(N)
+  for (iRow in 1:(N-1)) { # Same principles as in makeRandomGraph()
+    for (iCol in (iRow+1):N) {
+      # geometric distance ...
+      d <- sqrt((AM$x[iRow] - AM$x[iCol])^2 +
+                (AM$y[iRow] - AM$y[iCol])^2)  # Pythagoras
+      # distance dependent probability
+      p <- 1 - 1/((1 + (Q * (exp(-B * (d-t)))))^(1 / nu))
+      if (runif(1) < p) {
+        AM$mat[iRow, iCol] <- 1
+      }
+    }
+  }
+  return(AM)
+}
+
+# Getting the parameters of a generalized logistic right takes a bit of
+# experimenting. If you are interested, you can try a few variations. Or you can
+# look up the function at
+# https://en.wikipedia.org/wiki/Generalised_logistic_function
+
+# This function computes generalized logistics ...
+# genLog <- function(x, B = 25, Q = 0.001, t = 0.5) {
+#     # generalized logistic (sigmoid)
+#     nu <- 1
+#     return(1 - 1/((1 + (Q * (exp(-B * (x-t)))))^(1 / nu)))
+# }
+#
+# ... and this code plots p-values over the distances we could encouter between
+# our nodes: from 0 to sqrt(2) i.e. the diagonal of the unit square in which we
+# will place our nodes.
+# x <- seq(0, sqrt(2), length.out = 50)
+# plot(x, genLog(x), type="l", col="#AA0000", ylim = c(0, 1),
+#      xlab = "d", ylab = "p(edge)")
+
+# 200 node random geomteric graph
+N <- 200
+set.seed(112358)                       # set RNG seed for repeatable randomness
+rGAM <- makeRandomGeometricAM(as.character(1:N), t = 0.4)
+set.seed(NULL)                         # reset the RNG
+
+
+myGRG <- igraph::graph_from_adjacency_matrix(rGAM$mat, mode = "undirected")
+
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myGRG,
+     layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
+     rescale = FALSE,
+     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
+     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
+     vertex.color=heat.colors(max(igraph::degree(myGRG)+1))[igraph::degree(myGRG)+1],
+     vertex.size = 0.1 + (0.2 * igraph::degree(myGRG)),
+     vertex.label = NA)
+par(oPar)
+
+# degree distribution:
+(dg <- igraph::degree(myGRG))
+brk <- seq(min(dg) - 0.5, max(dg) + 0.5, by = 1)
+hist(dg, breaks = brk, col = "#FCC6D2",
+     xlim = c(0, 25), xaxt = "n",
+     main = "Node degrees: 200 nodes RG graph",
+     xlab = "Degree", ylab = "Number")
+axis(side = 1, at = c(0, min(dg):max(dg)))
+
+# You'll find that this is kind of in-between the random, and the scale-free
+# graph. We do have hubs, but they are not as extreme as in the scale-free case;
+# and we have have no singletons, in contrast to the random graph.
+
+(freqRank <- table(dg))
+plot(log10(as.numeric(names(freqRank)) + 1),
+     log10(as.numeric(freqRank)), type = "b",
+     pch = 21, bg = "#FCC6D2",
+     xlab = "log(Rank)", ylab = "log(frequency)",
+     main = "200 nodes in a random geometric network")
+
+
+
+# =    3  A CLOSER LOOK AT THE igraph:: PACKAGE  ===============================
+
+
+# ==   3.1  Basics  ============================================================
+
+# The basic object of the igraph:: package is a graph object. Let's explore the
+# first graph some more, the one we built with our random gene names:
+summary(myG)
+
+# This output means: this is an IGRAPH graph, with U = UN-directed edges
+#  and N = named nodes, that has 20 nodes and 20 edges. For details, see
+?igraph::print.igraph
+
+mode(myG)
+class(myG)
+
+# This means an igraph:: graph object is a special list object; it is opaque in
+# the sense that a user is never expected to modify its components directly, but
+# through a variety of helper functions which the package provides. There are
+# many ways to construct graphs - from adjacency matrices, as we have just done,
+# from edge lists, or by producing random graphs according to a variety of
+# recipes, called _games_ in this package.
+
+# Two basic functions retrieve nodes "Vertices", and "Edges":
+igraph::V(myG)
+igraph::E(myG)
+
+# additional properties can be retrieved from the Vertices ...
+igraph::V(myG)$name
+
+
+# As with many R objects, loading the package provides special functions that
+# can be accessed via the same name as the basic R functions, for example:
+
+print(myG)
+plot(myG)  # this is the result of default plot parameters
+
+# ... where plot() allows the usual flexibility of fine-tuning the plot. We
+# first layout the node coordinates with the Fruchtermann-Reingold algorithm - a
+# force-directed layout that applies an ettractive potential along edges (which
+# pulls nodes together) and a repulsive potential to nodes (so they don't
+# overlap). Note the use of the degree() function to color and scale nodes and
+# labels by degree and the use of the V() function to retrieve the vertex names.
+# See ?plot.igraph for details."
+
+# Plot with some customizing parameters
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myG,
+     layout = igraph::layout_with_fr(myG),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 12 + (5 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
+     edge.width = 2,
+     vertex.label = igraph::V(myG)$name,
+     vertex.label.family = "sans",
+     vertex.label.cex = 0.9)
+par(oPar)
+
+# ... or with a different layout:
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myG,
+     layout = igraph::layout_in_circle(myG),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 12 + (5 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
+     edge.width = 2,
+     vertex.label = igraph::V(myG)$name,
+     vertex.label.family = "sans",
+     vertex.label.cex = 0.9)
+par(oPar)
+
+# igraph:: has a large number of graph-layout functions: see
+# ?layout_  and try them all.
+
+
+# ==   3.2  Components  ========================================================
+
+# The function igrap::components() tells us whether there are components of the
+# graph in which there is no path to other components.
+igraph::components(myG)
+
+# In the _membership_ vector, nodes are annotated with the index of the
+# component they are part of. Sui7 is the only node of component 2, Cyj1 is in
+# the third component etc. This is perhaps more clear if we sort by component
+# index
+sort(igraph::components(myG)$membership, decreasing = TRUE)
+
+# Retrieving e.g. the members of the first component from the list can be done by subsetting:
+
+(sel <- igraph::components(myG)$membership == 1)  # boolean vector ..
+(c1 <- igraph::components(myG)$membership[sel])
+names(c1)
+
+
+# =    4  RANDOM GRAPHS AND GRAPH METRICS  =====================================
+
+
+# Let's explore some of the more interesting, topological graph measures. We
+# start by building a somewhat bigger graph. We aren't quite sure whether
+# biological graphs are small-world, or random-geometric, or
+# preferential-attachment ... but igraph:: has ways to simulate the basic ones
+# (and we could easily simulate our own). Look at the following help pages:
+
+?igraph::sample_gnm             # see also sample_gnp for the Erdös-Rényi models
+?igraph::sample_smallworld      # for the Watts & Strogatz model
+?igraph::sample_pa              # for the Barabasi-Albert model
+
+# But note that there are many more sample_ functions. Check out the docs!
+
+# Let's look at betweenness measures for our first graph. Here: the nodes again
+# colored by degree. Degree centrality states: nodes of higher degree are
+# considered to be more central. And that's also the way the force-directed
+# layout drawas them, obviously.
+
+set.seed(112358)                       # set RNG seed for repeatable randomness
+myGxy <- igraph::layout_with_fr(myG)   # calculate layout coordinates
+set.seed(NULL)                         # reset the RNG
+
+oPar <- par(mar = rep(0, 4))           # turn margins off, save graphics state
+plot(myG,
+     layout = myGxy,
+     rescale = FALSE,
+     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
+     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 40 + (15 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
+     vertex.label = igraph::V(myG)$name,
+     vertex.label.family = "sans")
+par(oPar)                              # restore graphics state
+
+# ==   4.1  Diameter  ==========================================================
+
+igraph::diameter(myG)  # The diameter of a graph is its maximum length
+                       # shortest path.
+
+# let's plot this path: here are the nodes ...
+igraph::get_diameter(myG)
+
+# ... and we can get the x, y coordinates from iGxy by subsetting with the node
+# names. The we draw the diameter-path with a transparent, thick pink line:
+lines(myGxy[igraph::get_diameter(myG),], lwd=10, col="#ff63a788")
+
+# == Centralization scores
+
+?igraph::centralize
+# replot our graph, and color by log_betweenness:
+
+bC <- igraph::centr_betw(myG)  # calculate betweenness centrality
+nodeBetw <- bC$res
+nodeBetw <- round(log(nodeBetw +1)) + 1
+
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myG,
+     layout = myGxy,
+     rescale = FALSE,
+     xlim = c(min(myGxy[,1]) * 0.99, max(myGxy[,1]) * 1.01),
+     ylim = c(min(myGxy[,2]) * 0.99, max(myGxy[,2]) * 1.01),
+     vertex.color=heat.colors(max(igraph::degree(myG)+1))[igraph::degree(myG)+1],
+     vertex.size = 40 + (15 * igraph::degree(myG)),
+     vertex.label.cex = 0.4 + (0.15 * igraph::degree(myG)),
+     vertex.label = igraph::V(myG)$name,
+     vertex.label.family = "sans")
+par(oPar)
+
+# Note that the betweenness - the number of shortest paths that pass through a
+# node, is in general higher for high-degree nodes - but not always: Eqr2 has
+# higher betweenness than Itv7: this measure really depends on the detailed
+# local topology of the graph."
+
+# Can you use centr_eigen() and centr_degree() to calculate the respective
+# values? That's something I would expect you to be able to do.
+#
+# Lets plot betweenness centrality for our random geometric graph:
+
+bCmyGRG <- igraph::centr_betw(myGRG)  # calculate betweenness centrality
+
+nodeBetw <- bCmyGRG$res
+nodeBetw <- round((log(nodeBetw +1))^2.5) + 1
+
+# colours and size proportional to betweenness
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myGRG,
+     layout = cbind(rGAM$x, rGAM$y), # use our node coordinates for layout,
+     rescale = FALSE,
+     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
+     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
+     vertex.color=heat.colors(max(nodeBetw))[nodeBetw],
+     vertex.size = 0.1 + (0.03 * nodeBetw),
+     vertex.label = NA)
+par(oPar)
+
+igraph::diameter(myGRG)
+lines(rGAM$x[igraph::get_diameter(myGRG)],
+      rGAM$y[igraph::get_diameter(myGRG)],
+      lwd = 10,
+      col = "#ff335533")
+
+
+
+# =    5  GRAPH CLUSTERING  ====================================================
+
+
+# Clustering finds "communities" in graphs - and depending what the edges
+# represent, these could be complexes, pathways, biological systems or similar.
+# There are many graph-clustering algorithms. One approach with many attractive
+# properties is the Map Equation, developed by Martin Rosvall. See:
+# http://www.ncbi.nlm.nih.gov/pubmed/18216267 and htttp://www.mapequation.org
+
+
+myGRGclusters <- igraph::cluster_infomap(myGRG)
+igraph::modularity(myGRGclusters)  # ... measures how separated the different
+                                   # membership types are from each other
+igraph::membership(myGRGclusters)         # which nodes are in what cluster?
+table(igraph::membership(myGRGclusters))  # how large are the clusters?
+
+# The largest cluster has 48 members, the second largest has 25, etc.
+
+
+# Lets plot our graph again, coloring the nodes of the first five communities by
+# their cluster membership:
+
+# first, make a vector with as many grey colors as we have communities ...
+commColors <- rep("#f1eef6", max(igraph::membership(myGRGclusters)))
+# ... then overwrite the first five with "real colors" - something like rust,
+# lilac, pink, and mauve or so.
+commColors[1:5] <- c("#980043", "#dd1c77", "#df65b0", "#c994c7", "#d4b9da")
+
+
+oPar <- par(mar= c(0, 0, 0, 0)) # Turn margins off
+plot(myGRG,
+     layout = cbind(rGAM$x, rGAM$y),
+     rescale = FALSE,
+     xlim = c(min(rGAM$x) * 0.9, max(rGAM$x) * 1.1),
+     ylim = c(min(rGAM$y) * 0.9, max(rGAM$y) * 1.1),
+     vertex.color=commColors[igraph::membership(myGRGclusters)],
+     vertex.size = 0.3 + (0.4 * igraph::degree(myGRG)),
+     vertex.label = NA)
+par(oPar)
+
+# That's all.
+
+
+# [END]
diff --git a/myScripts/Graphs_and_networks.R b/myScripts/Graphs_and_networks.R
new file mode 100644
index 0000000..e8757d5
--- /dev/null
+++ b/myScripts/Graphs_and_networks.R
@@ -0,0 +1,71 @@
+generate_overview_graph <- function() {
+  sealKey()
+  scCCnet <- readRDS("./data/scCCnet.rds") # S. cerevisiae Cell Cycle network
+  scCCnet_ns <- scCCnet[-3] # Remove scores
+  graph <- igraph::graph_from_edgelist(as.matrix(scCCnet_ns), directed = FALSE)
+  degrees <- igraph::degree(graph, normalized = TRUE)
+  graphxy <- igraph::layout_with_graphopt(graph, charge=0.30, mass=30)
+
+  # Generate color palette:
+  colors = heat.colors(
+    max(igraph::degree(graph)+1)
+  )[igraph::degree(graph)+1]
+
+  plot(
+    graph,
+    layout=graphxy,
+    rescale=TRUE, # Default is true, explicit as reminder.
+    vertex.size = (125 * degrees),
+    vertex.color = colors,
+    vertex.label=NA
+  )
+
+  legend(
+    "topright",
+    legend=c(max(igraph::degree(graph)), 0),
+    fill=c(colors[which.max(igraph::degree(graph))], colors[1]),
+    title = "Degrees"
+  )
+  sealKey()
+}
+
+generate_log_freq_vs_log_rank <- function() {
+  sealKey()
+  scCCnet <- readRDS("./data/scCCnet.rds") # S. cerevisiae Cell Cycle network
+  scCCnet_ns <- scCCnet[-3] # Remove scores
+  graph <- igraph::graph_from_edgelist(as.matrix(scCCnet_ns), directed = FALSE)
+  degrees <- igraph::degree(graph, normalized = FALSE)
+  freqs <- table(degrees)
+  scatter.smooth(log10(as.numeric(names(freqs)) + 1),
+                 log10(as.numeric(freqs)), type = "b",
+                 bg = "#A5F5CC",
+                 xlab = "log(Rank)", ylab = "log(frequency)",
+                 main = "Log Frequency against Log-Rank")
+  sealKey()
+}
+
+
+get_top_degrees <- function(n = 5) {
+  sealKey()
+  scCCnet <- readRDS("./data/scCCnet.rds") # S. cerevisiae Cell Cycle network
+  scCCnet_ns <- scCCnet[-3] # Remove scores
+  graph <- igraph::graph_from_edgelist(as.matrix(scCCnet_ns), directed = FALSE)
+  degrees <- igraph::degree(graph, normalized = FALSE)
+
+  sorted_degrees <- degrees[order(degrees)]
+  sealKey()
+  tail(sorted_degrees, n)
+}
+
+get_bot_degrees <- function(n = 5) {
+  sealKey()
+  scCCnet <- readRDS("./data/scCCnet.rds") # S. cerevisiae Cell Cycle network
+  scCCnet_ns <- scCCnet[-3] # Remove scores
+  graph <- igraph::graph_from_edgelist(as.matrix(scCCnet_ns), directed = FALSE)
+  degrees <- igraph::degree(graph, normalized = FALSE)
+
+  sorted_degrees <- degrees[order(degrees)]
+  sealKey()
+  head(sorted_degrees, n)
+}
+
diff --git a/myScripts/genome_annotation.R b/myScripts/genome_annotation.R
new file mode 100644
index 0000000..d7d6273
--- /dev/null
+++ b/myScripts/genome_annotation.R
@@ -0,0 +1,2 @@
+# Integrator Unit: Genome Annotation
+bio3d::get.seq()
diff --git a/myScripts/graphs_and_networks_logs.png b/myScripts/graphs_and_networks_logs.png
new file mode 100644
index 0000000..07935a5
Binary files /dev/null and b/myScripts/graphs_and_networks_logs.png differ
diff --git a/myScripts/graphs_and_networks_overview.png b/myScripts/graphs_and_networks_overview.png
new file mode 100644
index 0000000..74231f8
Binary files /dev/null and b/myScripts/graphs_and_networks_overview.png differ