Add code and utility functions for database export of protein annotations to JSON - towards sharing annotations on the Student Wiki Public page
This commit is contained in:
parent
3bee83495f
commit
4d071cf8d5
@ -3,12 +3,14 @@
|
||||
# Purpose: A Bioinformatics Course:
|
||||
# R code accompanying the BIN-FUNC-Domain_annotation unit.
|
||||
#
|
||||
# Version: 1.2
|
||||
# Version: 1.3
|
||||
#
|
||||
# Date: 2017-11 - 2020-10
|
||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||
#
|
||||
# Versions:
|
||||
# 1.3 Add code for database export to JSON and instructions
|
||||
# for uploading annotations to the Public Student Wiki page
|
||||
# 1.2 Consistently: data in ./myScripts/ ;
|
||||
# begin SHARING DATA section
|
||||
# 1.1 2020 Updates
|
||||
@ -31,12 +33,14 @@
|
||||
#TOC>
|
||||
#TOC> Section Title Line
|
||||
#TOC> ---------------------------------------------------------------------
|
||||
#TOC> 1 Update your database script 42
|
||||
#TOC> 1.1 Preparing an annotation file ... 49
|
||||
#TOC> 1.1.1 BEFORE "BIN-ALI-Optimal_sequence_alignment" 52
|
||||
#TOC> 1.1.2 AFTER "BIN-ALI-Optimal_sequence_alignment" 97
|
||||
#TOC> 1.2 Execute and Validate 124
|
||||
#TOC> 2 Plot Annotations 149
|
||||
#TOC> 1 Update your database script 48
|
||||
#TOC> 1.1 Preparing an annotation file ... 55
|
||||
#TOC> 1.1.1 BEFORE "BIN-ALI-Optimal_sequence_alignment" 58
|
||||
#TOC> 1.1.2 AFTER "BIN-ALI-Optimal_sequence_alignment" 106
|
||||
#TOC> 1.2 Execute and Validate 133
|
||||
#TOC> 2 Plot Annotations 158
|
||||
#TOC> 3 SHARING DATA 283
|
||||
#TOC> 3.1 Post MBP1_MYSPE as JSON data 298
|
||||
#TOC>
|
||||
#TOC> ==========================================================================
|
||||
|
||||
@ -276,16 +280,47 @@ par(oPar) # reset the plot parameters
|
||||
# It would be better to align the motif borders, at least approximately (not
|
||||
# all proteins have all motifs). How would you go about doing that?
|
||||
|
||||
# = 1 SHARING DATA ======
|
||||
# = 3 SHARING DATA ========================================================
|
||||
|
||||
# It's particularly interesting to compare such annotations across many
|
||||
# homologous proteins. I have created a file on the Student Wiki that you can
|
||||
# homologous proteins. I have created a page on the Student Wiki () that you can
|
||||
# edit, and then download the data from the entire class directly to your
|
||||
# RStudio project.
|
||||
#
|
||||
|
||||
# I have provided a function that extracts all information that refers to a
|
||||
# single protein from the database, and prints it out as well-formatted JSON,
|
||||
# suitable to be pasted into our shareable Wiki-page. There is a fair amount of
|
||||
# bookkeeping involved, but the code is not otherwise very enlightening so I
|
||||
# will spare you the details - it's in "./scripts/ABC-dbUtilities.R" if you
|
||||
# would want to have a look.
|
||||
|
||||
# == 3.1 Post MBP1_MYSPE as JSON data ======================================
|
||||
|
||||
# Task:
|
||||
# =====
|
||||
# TBC ...
|
||||
# 1: Run the following code:
|
||||
|
||||
cat("{{Vspace}}",
|
||||
"<!-- ==== BEGIN PROTEIN ==== -->",
|
||||
"<pre>",
|
||||
dbProt2JSON(sprintf("MBP1_%s", biCode(MYSPE))),
|
||||
"</pre>",
|
||||
"<!-- ===== END PROTEIN ====== -->",
|
||||
"", sep = "\n"
|
||||
)
|
||||
|
||||
# 2: Copy the entire output,
|
||||
# 3: Navigate to
|
||||
# http://steipe.biochemistry.utoronto.ca/abc/students/index.php/Public
|
||||
# ... edit the page, and paste your output at the top.
|
||||
# 4: Save your edits.
|
||||
|
||||
# Next, once we have collected a number of protein annotations, we can access
|
||||
# the page and import the data into our database.
|
||||
#
|
||||
# Code to come soon ...
|
||||
|
||||
|
||||
|
||||
# [END]
|
||||
|
@ -1,56 +1,87 @@
|
||||
# tocID <- "scripts/ABC-dbUtilities.R"
|
||||
#
|
||||
# database utilities for ABC learning units
|
||||
# Purpose: Database utilities for ABC learning units.
|
||||
#
|
||||
# Version 2.1
|
||||
#
|
||||
# Date: 2017-11 - 2020-10
|
||||
# Author: Boris Steipe (boris.steipe@utoronto.ca)
|
||||
#
|
||||
# Versions:
|
||||
# 2.1 Add JSON export functions
|
||||
# 2.0 Test all JSON import and prevent addition of duplicates. This
|
||||
# is necessary for import of data from the public page
|
||||
# 1.1 2020 Updates
|
||||
# 1.0 Live version 2017
|
||||
#
|
||||
# Notes:
|
||||
# There are no functions to modify or delete entries. To do either,
|
||||
# recreate the database with correct data in the creation script. This is the
|
||||
# preferred way that ensures the entire database can be reproduced by
|
||||
# source()'ing its generating script.
|
||||
#
|
||||
# Inserting data goes only through the very most minimal validation steps. For
|
||||
# production applications, more validation would need to be added, as well
|
||||
# as an overall validation of database integrity
|
||||
#
|
||||
# ToDo:
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
|
||||
#TOC> ==========================================================================
|
||||
#TOC>
|
||||
#TOC> Section Title Line
|
||||
#TOC> -------------------------------------------------
|
||||
#TOC> 1 PACKAGES 32
|
||||
#TOC> 2 FUNCTIONS 50
|
||||
#TOC> 2.01 dbSanitizeSequence() 53
|
||||
#TOC> 2.02 dbConfirmUnique() 88
|
||||
#TOC> 2.03 dbInit() 106
|
||||
#TOC> 2.04 dbAutoincrement() 147
|
||||
#TOC> 2.05 dbAddProtein() 160
|
||||
#TOC> 2.06 dbAddFeature() 180
|
||||
#TOC> 2.07 dbAddTaxonomy() 199
|
||||
#TOC> 2.08 dbAddAnnotation() 215
|
||||
#TOC> 2.09 dbFetchUniProtSeq() 243
|
||||
#TOC> 2.10 dbFetchPrositeFeatures() 289
|
||||
#TOC> 2.11 node2text() 339
|
||||
#TOC> 2.12 dbFetchNCBItaxData() 351
|
||||
#TOC> 2.13 UniProtIDmap() 390
|
||||
#TOC> 3 TESTS 429
|
||||
#TOC> Section Title Line
|
||||
#TOC> -------------------------------------------------------
|
||||
#TOC> 1 INITIALISATIONS AND PARAMETERS 60
|
||||
#TOC> 2 PACKAGES 65
|
||||
#TOC> 3 FUNCTIONS 81
|
||||
#TOC> 3.01 dbSanitizeSequence() 84
|
||||
#TOC> 3.02 dbConfirmUnique() 119
|
||||
#TOC> 3.03 dbInit() 137
|
||||
#TOC> 3.04 dbAutoincrement() 177
|
||||
#TOC> 3.05 dbAddProtein() 190
|
||||
#TOC> 3.06 dbAddFeature() 222
|
||||
#TOC> 3.07 dbAddTaxonomy() 253
|
||||
#TOC> 3.08 dbAddAnnotation() 288
|
||||
#TOC> 3.09 dbFetchUniProtSeq() 335
|
||||
#TOC> 3.10 dbFetchPrositeFeatures() 381
|
||||
#TOC> 3.11 node2text() 431
|
||||
#TOC> 3.12 dbFetchNCBItaxData() 443
|
||||
#TOC> 3.13 UniProtIDmap() 482
|
||||
#TOC> 3.14 dbProt2JSON() 521
|
||||
#TOC> 3.15 dbSeq2JSON() 606
|
||||
#TOC> 3.16 dbRow2JSON() 636
|
||||
#TOC> 4 TESTS 656
|
||||
#TOC>
|
||||
#TOC> ==========================================================================
|
||||
|
||||
|
||||
# = 1 PACKAGES ============================================================
|
||||
# = 1 INITIALISATIONS AND PARAMETERS ======================================
|
||||
|
||||
doTESTS <- FALSE # run tests if TRUE
|
||||
|
||||
|
||||
# = 2 PACKAGES ============================================================
|
||||
|
||||
|
||||
if (! requireNamespace("jsonlite", quietly = TRUE)) {
|
||||
install.packages("jsonlite")
|
||||
}
|
||||
|
||||
|
||||
if (! requireNamespace("httr", quietly = TRUE)) {
|
||||
install.packages("httr")
|
||||
}
|
||||
|
||||
|
||||
if (! requireNamespace("xml2", quietly = TRUE)) {
|
||||
install.packages("xml2")
|
||||
}
|
||||
|
||||
|
||||
# = 2 FUNCTIONS ===========================================================
|
||||
# = 3 FUNCTIONS ===========================================================
|
||||
|
||||
|
||||
# == 2.01 dbSanitizeSequence() =============================================
|
||||
# == 3.01 dbSanitizeSequence() =============================================
|
||||
dbSanitizeSequence <- function(s, unambiguous = TRUE) {
|
||||
# Remove FASTA header lines, if any,
|
||||
# flatten any structure that s has,
|
||||
@ -85,7 +116,7 @@ dbSanitizeSequence <- function(s, unambiguous = TRUE) {
|
||||
}
|
||||
|
||||
|
||||
# == 2.02 dbConfirmUnique() ================================================
|
||||
# == 3.02 dbConfirmUnique() ================================================
|
||||
dbConfirmUnique <- function(x) {
|
||||
# x is a vector of logicals.
|
||||
# returns x if x has exactly one TRUE element.
|
||||
@ -103,10 +134,10 @@ dbConfirmUnique <- function(x) {
|
||||
}
|
||||
|
||||
|
||||
# == 2.03 dbInit() =========================================================
|
||||
# == 3.03 dbInit() =========================================================
|
||||
dbInit <- function() {
|
||||
# Return an empty instance of the protein database
|
||||
# Open the link and study the schema:
|
||||
# The schema is here:
|
||||
# https://docs.google.com/presentation/d/13vWaVcFpWEOGeSNhwmqugj2qTQuH1eZROgxWdHGEMr0
|
||||
|
||||
db <- list()
|
||||
@ -125,7 +156,6 @@ dbInit <- function() {
|
||||
ID = numeric(),
|
||||
species = character())
|
||||
|
||||
|
||||
db$annotation <- data.frame(
|
||||
ID = numeric(),
|
||||
proteinID = numeric(),
|
||||
@ -144,7 +174,7 @@ dbInit <- function() {
|
||||
}
|
||||
|
||||
|
||||
# == 2.04 dbAutoincrement() ================================================
|
||||
# == 3.04 dbAutoincrement() ================================================
|
||||
dbAutoincrement <- function(tb) {
|
||||
# Return a unique integer that can be used as a primary key
|
||||
# Value:
|
||||
@ -157,90 +187,152 @@ dbAutoincrement <- function(tb) {
|
||||
}
|
||||
|
||||
|
||||
# == 2.05 dbAddProtein() ===================================================
|
||||
# == 3.05 dbAddProtein() ===================================================
|
||||
dbAddProtein <- function(db, jsonDF) {
|
||||
# Add one or more protein entries to the database db.
|
||||
# Add one or more protein entries to the database db if a protein with the
|
||||
# same name does not yet exist. This enforces that protein names are unique.
|
||||
# Parameters:
|
||||
# db list a database created with dbInit()
|
||||
# jsonDF data frame protein data imported into a data frame with
|
||||
# fromJSON()
|
||||
|
||||
for (i in seq_len(nrow(jsonDF))) {
|
||||
x <- data.frame(ID = dbAutoincrement(db$protein),
|
||||
name = jsonDF$name[i],
|
||||
RefSeqID = jsonDF$RefSeqID[i],
|
||||
UniProtID = jsonDF$UniProtID[i],
|
||||
taxonomyID = jsonDF$taxonomyID[i],
|
||||
sequence = dbSanitizeSequence(jsonDF$sequence[i]))
|
||||
db$protein <- rbind(db$protein, x)
|
||||
if (jsonDF$name[i] %in% db$protein$name) {
|
||||
cat(sprintf("Note: Protein No. %d in the input is \"%s\", but %s.\n",
|
||||
i, jsonDF$name[i],
|
||||
"a protein with this name already exists in the database. ",
|
||||
"Skipping this input."))
|
||||
isValid <- FALSE
|
||||
}
|
||||
|
||||
if (isValid) {
|
||||
x <- data.frame(ID = dbAutoincrement(db$protein),
|
||||
name = jsonDF$name[i],
|
||||
RefSeqID = jsonDF$RefSeqID[i],
|
||||
UniProtID = jsonDF$UniProtID[i],
|
||||
taxonomyID = jsonDF$taxonomyID[i],
|
||||
sequence = dbSanitizeSequence(jsonDF$sequence[i]))
|
||||
db$protein <- rbind(db$protein, x)
|
||||
}
|
||||
}
|
||||
return(db)
|
||||
}
|
||||
|
||||
|
||||
# == 2.06 dbAddFeature() ===================================================
|
||||
# == 3.06 dbAddFeature() ===================================================
|
||||
dbAddFeature <- function(db, jsonDF) {
|
||||
# Add one or more feature entries to the database db.
|
||||
# Add one or more feature entries to the database db. Skip if a feature with
|
||||
# the same name already exists.
|
||||
# Parameters:
|
||||
# db list a database created with dbInit()
|
||||
# jsonDF data frame feature data imported into a data frame with
|
||||
# fromJSON()
|
||||
for (i in seq_len(nrow(jsonDF))) {
|
||||
x <- data.frame(ID = dbAutoincrement(db$feature),
|
||||
name = jsonDF$name[i],
|
||||
description = jsonDF$description[i],
|
||||
sourceDB = jsonDF$sourceDB[i],
|
||||
accession = jsonDF$accession[i])
|
||||
db$feature <- rbind(db$feature, x)
|
||||
isValid <- TRUE
|
||||
if (jsonDF$name[i] %in% db$feature$name) {
|
||||
cat(sprintf("Note: Feature No. %d in the input is \"%s\", but %s.\n",
|
||||
i, jsonDF$name[i],
|
||||
"a feature with this name already exists in the database. ",
|
||||
"Skipping this input."))
|
||||
isValid <- FALSE
|
||||
}
|
||||
|
||||
if (isVALID) {
|
||||
x <- data.frame(ID = dbAutoincrement(db$feature),
|
||||
name = jsonDF$name[i],
|
||||
description = jsonDF$description[i],
|
||||
sourceDB = jsonDF$sourceDB[i],
|
||||
accession = jsonDF$accession[i])
|
||||
db$feature <- rbind(db$feature, x)
|
||||
}
|
||||
}
|
||||
return(db)
|
||||
}
|
||||
|
||||
|
||||
# == 2.07 dbAddTaxonomy() ==================================================
|
||||
# == 3.07 dbAddTaxonomy() ==================================================
|
||||
dbAddTaxonomy <- function(db, jsonDF) {
|
||||
# Add one or more taxonomy entries to the database db.
|
||||
# Add one or more taxonomy entries to the database db. Skip if species name
|
||||
# or taxonomy ID already exist in the database.
|
||||
# Parameters:
|
||||
# db list A database created with dbInit()
|
||||
# jsonDF data frame Taxonomy data imported into a data frame with
|
||||
# fromJSON()
|
||||
for (i in seq_len(nrow(jsonDF))) {
|
||||
x <- data.frame(
|
||||
ID = jsonDF$ID[i],
|
||||
species = jsonDF$species[i])
|
||||
db$taxonomy <- rbind(db$taxonomy, x)
|
||||
isValid <- TRUE
|
||||
|
||||
if (jsonDF$species[i] %in% db$taxonomy$species) {
|
||||
cat(sprintf("Note: Species No. %d in the input is \"%s\", but %s%s\n",
|
||||
i, jsonDF$name[i],
|
||||
"a species with this name already exists in the database. ",
|
||||
"Skipping this input."))
|
||||
isValid <- FALSE
|
||||
} else if (jsonDF$ID[i] %in% db$taxonomy$ID) {
|
||||
cat(sprintf("Note: Taxonomy ID No. %d in the input is \"%d\", but %s%s\n",
|
||||
i, jsonDF$ID[i],
|
||||
"this taxonomy ID already exists in the database. ",
|
||||
"Skipping this input."))
|
||||
isValid <- FALSE
|
||||
}
|
||||
if (isValid) {
|
||||
x <- data.frame(
|
||||
ID = as.integer(jsonDF$ID[i]),
|
||||
species = jsonDF$species[i])
|
||||
db$taxonomy <- rbind(db$taxonomy, x)
|
||||
}
|
||||
}
|
||||
return(db)
|
||||
}
|
||||
|
||||
# == 2.08 dbAddAnnotation() ================================================
|
||||
|
||||
# == 3.08 dbAddAnnotation() ================================================
|
||||
dbAddAnnotation <- function(db, jsonDF) {
|
||||
# Add one or more annotation entries to the database db.
|
||||
# Add one or more annotation entries to the database db. Skip the entry if
|
||||
# it already exists in the database.
|
||||
# Parameters:
|
||||
# db list a database created with dbInit()
|
||||
# jsonDF data frame annotation data imported into a data frame with
|
||||
# fromJSON()
|
||||
for (i in seq_len(nrow(jsonDF))) {
|
||||
isValid <- TRUE
|
||||
|
||||
sel <- jsonDF$pName[i] == db$protein$name
|
||||
sel <- dbConfirmUnique(sel)
|
||||
sel <- dbConfirmUnique(sel) # Confirm that this protein ID exists
|
||||
pID <- db$protein$ID[sel]
|
||||
|
||||
sel <- jsonDF$fName[i] == db$feature$name
|
||||
sel <- dbConfirmUnique(sel)
|
||||
sel <- dbConfirmUnique(sel) # Confirm that this feature ID exists
|
||||
fID <- db$feature$ID[sel]
|
||||
|
||||
x <- data.frame(ID = dbAutoincrement(db$annotation),
|
||||
proteinID = pID,
|
||||
featureID = fID,
|
||||
start = as.integer(jsonDF$start[i]),
|
||||
end = as.integer(jsonDF$end[i]))
|
||||
db$annotation <- rbind(db$annotation, x)
|
||||
sel <- db$annotation$proteinID == pID &
|
||||
db$annotation$featureID == fID &
|
||||
db$annotation$start == as.integer(jsonDF$start[idx]) &
|
||||
db$annotation$end == as.integer(jsonDF$end[idx])
|
||||
|
||||
if (any(sel)) {
|
||||
cat(sprintf("Note: annotation No. %d in the input has %s%s%\n",
|
||||
i,
|
||||
"the same protein name, feature name, start, and end ",
|
||||
"as one that already exists in the database. ",
|
||||
"Skipping this input."))
|
||||
|
||||
isValid <- FALSE
|
||||
}
|
||||
|
||||
if (isValid) {
|
||||
x <- data.frame(ID = dbAutoincrement(db$annotation),
|
||||
proteinID = pID,
|
||||
featureID = fID,
|
||||
start = as.integer(jsonDF$start[i]),
|
||||
end = as.integer(jsonDF$end[i]))
|
||||
db$annotation <- rbind(db$annotation, x)
|
||||
}
|
||||
}
|
||||
return(db)
|
||||
}
|
||||
|
||||
|
||||
# == 2.09 dbFetchUniProtSeq() ==============================================
|
||||
# == 3.09 dbFetchUniProtSeq() ==============================================
|
||||
dbFetchUniProtSeq <- function(IDs) {
|
||||
# Fetch a protein sequence from UniProt.
|
||||
# Parameters:
|
||||
@ -286,7 +378,7 @@ if (FALSE) {
|
||||
|
||||
|
||||
|
||||
# == 2.10 dbFetchPrositeFeatures() =========================================
|
||||
# == 3.10 dbFetchPrositeFeatures() =========================================
|
||||
dbFetchPrositeFeatures <- function(ID) {
|
||||
# Fetch feature annotations from ScanProsite.
|
||||
# Parameters:
|
||||
@ -336,7 +428,7 @@ if (FALSE) {
|
||||
|
||||
}
|
||||
|
||||
# == 2.11 node2text() ======================================================
|
||||
# == 3.11 node2text() ======================================================
|
||||
node2text <- function(doc, tag) {
|
||||
# an extractor function for the contents of elements
|
||||
# between given tags in an XML response.
|
||||
@ -348,7 +440,7 @@ node2text <- function(doc, tag) {
|
||||
}
|
||||
|
||||
|
||||
# == 2.12 dbFetchNCBItaxData() =============================================
|
||||
# == 3.12 dbFetchNCBItaxData() =============================================
|
||||
dbFetchNCBItaxData <- function(ID) {
|
||||
# Fetch feature taxID and Organism from the NCBI.
|
||||
# Parameters:
|
||||
@ -387,7 +479,7 @@ dbFetchNCBItaxData <- function(ID) {
|
||||
|
||||
|
||||
|
||||
# == 2.13 UniProtIDmap() ===================================================
|
||||
# == 3.13 UniProtIDmap() ===================================================
|
||||
UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
||||
# Use UniProt ID mapping service to map one or more IDs
|
||||
# Parameters:
|
||||
@ -426,9 +518,144 @@ UniProtIDmap <- function (s, mapFrom = "P_REFSEQ_AC", mapTo = "ACC") {
|
||||
}
|
||||
|
||||
|
||||
# = 3 TESTS ===============================================================
|
||||
# == 3.14 dbProt2JSON() ====================================================
|
||||
dbProt2JSON <- function(thisProt) {
|
||||
# Extract all protein related data from myDB and return in JSON format.
|
||||
|
||||
if (FALSE) {
|
||||
thisData <- list()
|
||||
|
||||
# add a protein table
|
||||
sel <- which(myDB$protein$name == thisProt)
|
||||
thisData$protein <- myDB$protein[sel, ]
|
||||
|
||||
# add a taxonomy table
|
||||
sel <- which(myDB$taxonomy$ID == thisData$protein$taxonomyID)
|
||||
thisData$taxonomy <- myDB$taxonomy[sel, ]
|
||||
|
||||
# add the entries for this protein from the annotation table
|
||||
sel <- which(myDB$annotation$proteinID == thisData$protein$ID)
|
||||
thisData$annotation <- myDB$annotation[sel, ]
|
||||
# our .json convention uses pName and fName as keys, not the db-internal IDs
|
||||
# add empty columns for pName and fName
|
||||
l <- nrow(thisData$annotation)
|
||||
thisData$annotation$pName <- character(l)
|
||||
thisData$annotation$fName <- character(l)
|
||||
# get the appropriate protein and feature names
|
||||
for (i in seq_len(l)) {
|
||||
pID <- thisData$annotation$proteinID[i]
|
||||
sel <- which(myDB$protein$ID == pID)
|
||||
thisData$annotation$pName[i] <- myDB$protein$name[sel] # store pName
|
||||
fID <- thisData$annotation$featureID[i]
|
||||
sel <- which(myDB$feature$ID == fID)
|
||||
thisData$annotation$fName[i] <- myDB$feature$name[sel] # store fName
|
||||
}
|
||||
|
||||
# add the corresponding feature table
|
||||
sel <- which(myDB$feature$ID %in% thisData$annotation$featureID)
|
||||
thisData$feature <- myDB$feature[sel, ]
|
||||
|
||||
# remove columns that are not going into JSON output
|
||||
thisData$protein$ID <- NULL
|
||||
thisData$annotation$ID <- NULL
|
||||
thisData$annotation$proteinID <- NULL
|
||||
thisData$annotation$featureID <- NULL
|
||||
thisData$feature$ID <- NULL
|
||||
|
||||
# create JSON-formatted output
|
||||
# ( jsonlite::prettify() is too wordy for a compact Wikipage )
|
||||
|
||||
out <- character()
|
||||
out <- c(out, '{')
|
||||
|
||||
out <- c(out, ' "protein": {')
|
||||
sel <- colnames(thisData$protein) != "sequence"
|
||||
out <- c(out, sprintf(" %s,", dbRow2JSON(thisData$protein[1, sel],
|
||||
coll = ",\n ")))
|
||||
out <- c(out, dbSeq2JSON(thisData$protein$sequence[1]))
|
||||
out <- c(out, ' },')
|
||||
|
||||
out <- c(out, ' "taxonomy": {')
|
||||
out <- c(out, sprintf(" %s", dbRow2JSON(thisData$taxonomy)))
|
||||
out <- c(out, ' },')
|
||||
|
||||
out <- c(out, ' "annotation": [')
|
||||
for (i in seq_len(nrow(thisData$annotation))) {
|
||||
out <- c(out, sprintf(" {%s},", dbRow2JSON(thisData$annotation[i, ])))
|
||||
}
|
||||
out[length(out)] <- gsub(",$", "", out[length(out)]) # remove last ","
|
||||
out <- c(out, ' ],')
|
||||
|
||||
out <- c(out, ' "feature": [')
|
||||
sel <- colnames(thisData$feature) != "description"
|
||||
for (i in seq_len(nrow(thisData$feature))) {
|
||||
out <- c(out, sprintf(" {%s,",
|
||||
dbRow2JSON(thisData$feature[i, sel])))
|
||||
out <- c(out, sprintf(" %s},",
|
||||
dbRow2JSON(thisData$feature[i, "description",
|
||||
drop = FALSE])))
|
||||
}
|
||||
out[length(out)] <- gsub(",$", "", out[length(out)]) # remove last ","
|
||||
out <- c(out, ' ]')
|
||||
|
||||
out <- c(out, '}')
|
||||
|
||||
return(paste0(out, collapse = "\n"))
|
||||
}
|
||||
|
||||
|
||||
# == 3.15 dbSeq2JSON() =====================================================
|
||||
|
||||
dbSeq2JSON <- function(s, nIndents = 4, width = 70) {
|
||||
# Turn a sequence into a JSON key-value pair, with the value being a JSON
|
||||
# array of elements not exceeding a width of "width", and an indent of
|
||||
# "indents" spaces.
|
||||
ind <- paste0(rep(" ", nIndents), collapse = "")
|
||||
|
||||
out <- character()
|
||||
out <- c(out, sprintf("%s\"sequence\" : [", ind))
|
||||
|
||||
for (i in seq_along(s)) {
|
||||
l <- nchar(s[i])
|
||||
if (l <= width) {
|
||||
out <- c(out, s[i])
|
||||
} else {
|
||||
starts <- seq(1, l, by = width)
|
||||
ends <- seq(width, l, by = width)
|
||||
if (length(ends) < length(starts)) { ends <- c(ends, l) }
|
||||
out <- c(out, sprintf("%s \"%s\",", ind, substring(s[i], starts, ends)))
|
||||
}
|
||||
}
|
||||
out[length(out)] <- gsub(",$", "", out[length(out)]) # remove last ","
|
||||
|
||||
out <- c(out, sprintf("%s]", ind))
|
||||
return(paste0(out, collapse = "\n"))
|
||||
}
|
||||
cat(dbSeq2JSON(myDB$protein$sequence[1]))
|
||||
|
||||
|
||||
# == 3.16 dbRow2JSON() =====================================================
|
||||
|
||||
dbRow2JSON <- function(df, coll = ", ") {
|
||||
# Turn a single dataframe row into JSON key value pairs, where the keys are the
|
||||
# column names. Respects character / numeric mode.
|
||||
out <- character()
|
||||
for (i in 1:ncol(df)) {
|
||||
if (class(df[1, i]) == "integer") {
|
||||
val <- sprintf("%d", df[1, i])
|
||||
} else if (class(df[1, i]) == "numeric") {
|
||||
val <- sprintf("%f", df[1, i])
|
||||
} else {
|
||||
val <- sprintf("\"%s\"", as.character(df[1, i]))
|
||||
}
|
||||
out <- c(out, sprintf("\"%s\": %s", colnames(df)[i], val))
|
||||
}
|
||||
return(paste0(out, collapse = coll))
|
||||
}
|
||||
|
||||
|
||||
# = 4 TESTS ===============================================================
|
||||
|
||||
if (doTESTS) {
|
||||
if (! requireNamespace("testthat", quietly = TRUE)) {
|
||||
install.packages("testthat")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user