diff --git a/BIN-Data_integration.R b/BIN-Data_integration.R
new file mode 100644
index 0000000..af47d83
--- /dev/null
+++ b/BIN-Data_integration.R
@@ -0,0 +1,192 @@
+# BIN-Data_integration.R
+#
+# Purpose:  A Bioinformatics Course:
+#              R code accompanying the BIN-Data_integration unit.
+#
+# Version:  1.0
+#
+# Date:     2017  10  08
+# Author:   Boris Steipe (boris.steipe@utoronto.ca)
+#
+# Versions:
+#           1.0    First live version
+#
+#
+# TODO:
+#           Develop a fungi-specific BioMart example.
+#           (cf.
+# https://cran.r-project.org/web/packages/biomartr/vignettes/Functional_Annotation.html )
+#
+# == DO NOT SIMPLY  source()  THIS FILE! =======================================
+#
+# If there are portions you don't understand, use R's help system, Google for an
+# answer, or ask your instructor. Don't continue if you don't understand what's
+# going on. That's not how it works ...
+#
+# ==============================================================================
+ 
+#TOC> ==========================================================================
+#TOC> 
+#TOC>   Section  Title                       Line
+#TOC> -------------------------------------------
+#TOC>   1        Identifier mapping            41
+#TOC>   2        Cross-referencing tables     142
+#TOC> 
+#TOC> ==========================================================================
+ 
+
+
+
+
+# =    1  Identifier mapping  ==================================================
+
+# UniProt provides a well-designed ID mapping tool that can be accessed
+# online at     http://www.uniprot.org/mapping/
+#
+# Here we will use the UniProt Web API for this tool to map identifiers. The
+# UniProt ID mapping service supports a "RESTful API": responses can be obtained
+# simply via a Web- browsers request. Such requests are commonly sent via the
+# GET or POST verbs that a Webserver responds to, when a client asks for data.
+# GET requests are visible in the URL of the request; POST requests are not
+# directly visible, they are commonly used to send the contents of forms, or
+# when transmitting larger, complex data items. The UniProt ID mapping sevice
+# can accept long lists of IDs, thus using the POST mechanism makes sense. GET()
+# and  POST() functions are part of the httr package.
+
+# To begin, we load  httr, which supports sending and receiving data via the
+# http protocol, just like a Web browser.
+if (!require(httr, quietly=TRUE)) {
+  install.packages("httr")
+  library(httr)
+}
+
+# We will walk through the process with the refSeqID
+# of yeast Mbp1 and Swi4, and we will also enter a dummy ID to check what
+# happens if the ID can't be mapped:
+myQueryIDs <- "NP_010227 NP_00000 NP_011036"
+
+
+# The UniProt ID mapping service API is very straightforward to use: just define
+# the URL of the server and send a list of items labelled as "query" in the body
+# of the request.
+
+URL <- "http://www.uniprot.org/mapping/"
+response <- POST(URL,
+                 body = list(from = "P_REFSEQ_AC",   # Refseq Protein
+                             to = "ACC",             # UniProt ID
+                             format = "tab",
+                             query = myQueryIDs))
+
+response
+
+# We need to check the status code - if it is not 200, an error ocurred and we
+# can't process the result:
+status_code(response)
+
+# If the query is successful, tabbed text is returned. We can assign that to a
+# data frame. Note that we use textConnection() to read data directly from a char object, which can go in the spot where read.delim() expects a file-name argument.
+
+myMappedIDs <- read.delim(file = textConnection(content(response)),
+                          sep = "\t",
+                          stringsAsFactors = FALSE)
+myMappedIDs
+
+# If the query can't be fulfilled because of a problem with the server, a
+# WebPage is returned. But the server status is also returned and we can check
+# the status code. I have lately gotten many "503" status codes: Server Not
+# Available...
+
+# We wrap this into a function:
+
+myIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
+  # Use UniProt ID mapping service to map one or more IDs
+  # Parameters:
+  #    s  char  A string of separated IDs
+  #    mapFrom  char  the database in which the IDs in s are valid. Default
+  #                     is RefSeq protein
+  #    mapTo    char  the database in which the target IDs are valid. Default
+  #                     is UniProtKB
+  # Value
+  #    a data frame of mapped IDs, with column names From and To, or an
+  #    empty data frame if the mapping was unsuccessful. No rows are returned
+  #    for IDs that are not mapped.
+
+  URL <- "http://www.uniprot.org/mapping/"
+  response <- POST(URL,
+                   body = list(from = mapFrom,
+                               to = mapTo,
+                               format = "tab",
+                               query = s))
+
+  if (status_code(response) == 200) { # 200: oK
+    myMap <- read.delim(file = textConnection(content(response)),
+                        sep = "\t",
+                        stringsAsFactors = FALSE)
+  } else {
+    myMap <- data.frame()
+    warning(paste("No uniProt ID mapping returned:",
+                  "server sent status",
+                  status_code(response)))
+  }
+
+  return(myMap)
+}
+
+# Try it out ...
+myIDmap("NP_010227 NP_011036 NP_012881 NP_013729 NP_012165")
+
+# A function UniProtIDmap() is in the ABC-dbUtilities.R script and it is loaded
+# into your workspace on startup.
+
+
+# =    2  Cross-referencing tables  ============================================
+
+# Sometimes we get the IDs we need to map in a large table, e.g. from a list of
+# genes in a model organism database such as SGD, or from the Human Genen
+# Nomenclature commission. How do we map one set of identifiers to another one?
+
+# The function to use is match().
+# Here is a tiny set of identifiers taken from a much larger table to illustrate the principle:
+#
+
+myIDs <- data.frame(uID =   c("P38903", "P31383", "P47177", "P47096", "Q07747",
+                              "Q08641", "P47129", "P52910", "P00330", "P81450"),
+                    name =  c("2A5D", "2AAA", "2NDP", "3HAO", "AAD4",
+                              "AB140", "ACF4", "ACS2", "ADH1", "ATP18"),
+                    refID = c("NP_014657", "NP_009386",
+                              "NP_012683", "NP_012559",
+                              "NP_010038", "NP_014882",
+                              "NP_012616", "NP_013254",
+                              "NP_014555", "NP_013629"),
+                    stringsAsFactors = FALSE)
+
+myIDs
+
+# Say we want to map "NP_010038", "NP_012559", and "NP_013629", in that order to
+# their gene names.
+myQuery <- c("NP_010038", "NP_999999", "NP_013629")
+
+# %in% will only tell us if these IDs are present in the table:
+myQuery %in% myIDs$refID
+
+# ... but not where they are located. But match() does what we need here:
+match(myQuery, myIDs$refID)
+
+# ... and we can use the result to subset the column that we want to map to:
+myIDs$name[match(myQuery, myIDs$refID)]
+
+# Note that the output preserves the NA - i.e. the length of the mapped
+# values is exactly the same as the length of the query.
+
+# task: map the three genes to their UniProt Identifier.
+
+
+#
+# Note: if you want to do very many queries in large tables, use the
+# fmatch() function in the "fastmatch" package for a considerable
+# speedup.
+
+
+
+
+# [END]