Line termination change and old code.

2021-11-16 00:31:48 -05:00
parent b1e00f52f7
commit affe00f6fb
86 changed files with 37873 additions and 37876 deletions
--- a/myScripts/.myProfile.R
+++ b/myScripts/.myProfile.R
@@ -1,21 +1,21 @@
-# .myProfile.R
-# This contains information which the course framework needs from time to time
-# to personalize assignments, validate submissions etc. Make sure that
-# the information correctly matches our official records.
-# myEmail          char      A string with your eMail address. Use your official
-#                            UofT eMail address.
-# myStudentNumber  numeric   Your UofT student number. Take care to have this
-#                            correct.
-#
-# NOTE:
-# After you have updated this script, move the file to your "myScripts" folder.
-# Utility scripts will look for it on the path: "./myScripts/.myProfile.R"
-#
-# ==============================================================================
-# options(stringsAsFactors = FALSE)
-
-myEMail <- "yh.deng@mail.utoronto.ca"        # e.g. "u.franklin@utoronto.ca"
-myStudentNumber <- 1005845285  # e.g. 1003141592
-MYSPE <- "Cutaneotrichosporon oleaginosum" 
-
-# [END]
+# .myProfile.R
+# This contains information which the course framework needs from time to time
+# to personalize assignments, validate submissions etc. Make sure that
+# the information correctly matches our official records.
+# myEmail          char      A string with your eMail address. Use your official
+#                            UofT eMail address.
+# myStudentNumber  numeric   Your UofT student number. Take care to have this
+#                            correct.
+#
+# NOTE:
+# After you have updated this script, move the file to your "myScripts" folder.
+# Utility scripts will look for it on the path: "./myScripts/.myProfile.R"
+#
+# ==============================================================================
+# options(stringsAsFactors = FALSE)
+
+myEMail <- "yh.deng@mail.utoronto.ca"        # e.g. "u.franklin@utoronto.ca"
+myStudentNumber <- 1005845285  # e.g. 1003141592
+MYSPE <- "Cutaneotrichosporon oleaginosum" 
+
+# [END]
--- a/myScripts/ABC-INT-Mutation_impact-code.R
+++ b/myScripts/ABC-INT-Mutation_impact-code.R
@@ -1,54 +1,51 @@
-myFA <-             readFASTA("data/RAB39B_HSa_coding.fa")
-myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
-myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
-myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
-rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
-
-gen_mutations <- function(seq, N) {
-  stats <- c()
-  stats <- cbind(stats, c(0, 0, 0))
-  rownames(stats) <- c("silent", "missense", "nonsense")
-  colnames(stats) <- c("occurrences")
-  # Actual function
-  for (i in 1:217) {
-    # select index for mutation
-    working_seq <- Biostrings::DNAString(seq)
-    aa_seq <- Biostrings::translate(working_seq, no.init.codon = TRUE)
-    mut_action <- sample(c("ins", "del", "sub"), 1, TRUE)
-    mut_seq <- Biostrings::DNAString(seq)
-    if (mut_action == "sub") {
-      mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
-      possible_mutations <- Biostrings::DNA_BASES
-      possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(working_seq[mut_index]))]
-      mut_change <- sample(possible_mutations, 1, replace = TRUE)
-      mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, mut_change)
-    } else if (mut_action == "ins") {
-      mut_index <- sample(1:length(working_seq) - 2, 1, replace = TRUE)
-      possible_mutations <- Biostrings::DNA_BASES
-      mut_seq <- Biostrings::DNAString(paste(substring(working_seq, 1, mut_index - 1), sample(possible_mutations, 1), substring(working_seq, mut_index), sep = ""))
-    } else {
-      mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
-      mut_seq <- mut_seq[-mut_index]
-    }
-    mut_seq <- Biostrings::DNAString(substring(mut_seq, 1, length(mut_seq) - (length(mut_seq) %% 3)))
-    mut_aa <- Biostrings::translate(mut_seq, no.init.codon = TRUE)
-
-    # Note: we need silent, nonsense, and missense
-    mut_aa_stop <- match("*", Biostrings::as.matrix(mut_aa))
-    aa_seq_stop <- match("*", Biostrings::as.matrix(aa_seq))
-    if (!is.na(mut_aa_stop) & (is.na(aa_seq_stop) | mut_aa_stop < aa_seq_stop)) {
-      stats["nonsense", "occurrences"] <- 1 + stats["nonsense", "occurrences"]
-    } else if (mut_aa == aa_seq) {
-      stats["silent", "occurrences"] <- 1 + stats["silent", "occurrences"]
-    } else {
-      stats["missense", "occurrences"] <- 1 + stats["missense", "occurrences"]
-    }
-  }
-  return(stats)
-}
-N_test <- 1200
-gen_mutations("ATGATGATGATGATGATG", N_test)
-gen_mutations("CCCCCCCCCCCCCCCCCC", N_test)
-gen_mutations("TATTACTATTACTATTAC", N_test)
-gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", N_test)
-gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", N_test)
+gen_mutations <- function(seq, N) {
+  sealKey() # See: http://steipe.biochemistry.utoronto.ca/abc/index.php/BCH441_Code_submisson_instructions
+  stats <- c()
+  stats <- cbind(stats, c(0, 0, 0))
+  rownames(stats) <- c("silent", "missense", "nonsense")
+  colnames(stats) <- c("occurrences")
+  # Actual function
+  for (i in 1:N) {
+    original_seq <- Biostrings::DNAString(seq)
+    aa_seq <- Biostrings::translate(original_seq, no.init.codon = TRUE)
+
+    mut_seq <- Biostrings::DNAString(seq)
+    mut_index <- sample(1:length(original_seq), 1, replace = TRUE)
+    possible_mutations <- Biostrings::DNA_BASES
+    possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(original_seq[mut_index]))]
+    mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, sample(possible_mutations, 1, replace = TRUE))
+    mut_aa <- Biostrings::translate(mut_seq, no.init.codon = TRUE)
+
+
+    term_aa <- regexpr(pattern = "\\*", aa_seq)
+    term_mut_aa <- as.integer(regexpr(pattern = "\\*", mut_aa))
+    if ((term_aa == -1 && term_mut_aa != -1) || (term_mut_aa != -1 && term_mut_aa < term_aa)) {
+      stats["nonsense", "occurrences"] <- 1 + stats["nonsense", "occurrences"]
+    } else if (mut_aa == aa_seq) {
+      stats["silent", "occurrences"] <- 1 + stats["silent", "occurrences"]
+    } else {
+      stats["missense", "occurrences"] <- 1 + stats["missense", "occurrences"]
+    }
+  }
+  sealKey()
+  return(stats)
+}
+
+gen_mutations("ATGATGATGATGATGATG", 1000)
+gen_mutations("CCCCCCCCCCCCCCCCCC", 500)
+gen_mutations("TATTACTATTACTATTAC", 500)
+gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", 500)
+gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", 500)
+gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGA", 500)
+
+
+myFA <-             readFASTA("data/RAB39B_HSa_coding.fa")
+myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
+myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
+myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
+rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
+
+gen_mutations(myFA["RAB39B", 2], 10000)
+gen_mutations(myFA["PTPN5", 2], 10000)
+gen_mutations(myFA["PTPN11", 2], 10000)
+gen_mutations(myFA["KRAS", 2], 10000)
--- a/myScripts/BIN-Storing_data.R
+++ b/myScripts/BIN-Storing_data.R
@@ -1,41 +1,41 @@
-# ==   1.3  Task: submit for credit (part 1/2)  ================================
-# == Submission - Code to add another philosopher to the datamodel:
-
-pID <- autoincrement(philDB$person)
-immanuelKant <- data.frame(id = pID,
-                           name = "Immanuel Kant",
-                           born = "1724",
-                           died = "1804",
-                           school = "Enlightenment Philosophy")
-philDB$person <- rbind(philDB$person, immanuelKant)
-
-bID = autoincrement(philDB$books)
-immanuelKantWork <- data.frame(id = bID,
-                               title = "Critique of Pure Reason",
-                               published = "1781")
-philDB$books <- rbind(philDB$books, immanuelKantWork)
-philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
-
-bID = autoincrement(philDB$books)
-immanuelKantWork <- data.frame(id = bID,
-                               title = "Critique of Judgement",
-                               published = "1790")
-philDB$books <- rbind(philDB$books, immanuelKantWork)
-philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
-
-# == Submission: Code to list the philosophical schools in alphabetical order as well as their respective books in alphabetical order.
-
-schools <- unique(philDB$person$school)
-schools <- sort(schools)
-
-for (s in schools) {
-  cat(sprintf("%s\n", s))
-  authors = which(philDB$person$school == s)
-  for (author in authors) {
-    works = which(philDB$works$personID == author)
-    for (work in works) {
-      bookId = which(philDB$books$id == philDB$works$bookID[work])
-      cat(sprintf("\t%s - (%s)\n", philDB$books$title[bookId], philDB$books$published[bookId]))
-    }
-  }
+# ==   1.3  Task: submit for credit (part 1/2)  ================================
+# == Submission - Code to add another philosopher to the datamodel:
+
+pID <- autoincrement(philDB$person)
+immanuelKant <- data.frame(id = pID,
+                           name = "Immanuel Kant",
+                           born = "1724",
+                           died = "1804",
+                           school = "Enlightenment Philosophy")
+philDB$person <- rbind(philDB$person, immanuelKant)
+
+bID = autoincrement(philDB$books)
+immanuelKantWork <- data.frame(id = bID,
+                               title = "Critique of Pure Reason",
+                               published = "1781")
+philDB$books <- rbind(philDB$books, immanuelKantWork)
+philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
+
+bID = autoincrement(philDB$books)
+immanuelKantWork <- data.frame(id = bID,
+                               title = "Critique of Judgement",
+                               published = "1790")
+philDB$books <- rbind(philDB$books, immanuelKantWork)
+philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
+
+# == Submission: Code to list the philosophical schools in alphabetical order as well as their respective books in alphabetical order.
+
+schools <- unique(philDB$person$school)
+schools <- sort(schools)
+
+for (s in schools) {
+  cat(sprintf("%s\n", s))
+  authors = which(philDB$person$school == s)
+  for (author in authors) {
+    works = which(philDB$works$personID == author)
+    for (work in works) {
+      bookId = which(philDB$books$id == philDB$works$bookID[work])
+      cat(sprintf("\t%s - (%s)\n", philDB$books$title[bookId], philDB$books$published[bookId]))
+    }
+  }
 }
--- a/myScripts/CUTOLTaxonomy.json
+++ b/myScripts/CUTOLTaxonomy.json
@@ -1,4 +1,4 @@
-[{
-	"ID": 879819,
-	"species": "Cutaneotrichosporon oleaginosum"}
-]
+[{
+	"ID": 879819,
+	"species": "Cutaneotrichosporon oleaginosum"}
+]
--- a/myScripts/MBP1_CUTOL.json
+++ b/myScripts/MBP1_CUTOL.json
@@ -1,19 +1,19 @@
-[
-  { "name" : "MBP1_CUTOL",
-    "RefSeqID" : "XP_018278493.1",
-    "UniProtID" : "A0A0J0XLN0",
-    "taxonomyID" : 879819,
-    "sequence" : [
-       "MGKKAAAAGDGGPNTIYKATYSGVPVFEFICRNVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREVQ",
-       "KGEHEKVQGGYGKYQGTWVPIERGLALAKQYNVEDLLRPIIDFVPRESVSPPPAPKHAVAPPTKRNKEPK",
-       "PKEGLVPIKSAGVLSGTGRHQTPDSVGEDVESEVMDDMSESQTPSPLNGTSLLPAVDERSIDGMDIDGFS",
-       "MMNGGGHARKRSAAMMDDEDEYEQLKRARGNSAVHTPPPPGQSPRYGGMQHPLTQDEYNDIVLNYFVSEA",
-       "TQIPAVMTNPPYNWDPNGIIDDDHHTALHWAAAMGRTRVIKLLLSAGARIFDKNNLDQTPLMRSVMFTNN",
-       "YDLRKFPEVFELLHRSTLNIDKNNRTVFHHIANLALYKGKTHAARYYMEVILSRLADYPQELADVINFAD",
-       "EDGETALTLAARARSKRIVKALLDHGADPKLRNRDHKSAEDYILEDERFRSSPDVMLNRTQPSAAPRNPT",
-       "SLGAAVFSQGLPPQLYNSEAARLASGPHSSDILQQMQALARSFEAEKLNKERDVLEAKAMLTSIHTEVND",
-       "AGRTLHNLGEQMKPLEAKQGELDGLVERLQSKLQKDLARGARKWKAADEGRENRWKNGDDPSQAGEDYSD",
-       "LPELTAIPDNAEAEEERLRGEIEKMRARRGELVTRLVKAQTQTGTTDKMAQYRRLITAGCGGDINPGEID",
-       "DIVGQLLDMLENEAQSGRPAPPPQAAPSWVTS"]
-  }
-]
+[
+  { "name" : "MBP1_CUTOL",
+    "RefSeqID" : "XP_018278493.1",
+    "UniProtID" : "A0A0J0XLN0",
+    "taxonomyID" : 879819,
+    "sequence" : [
+       "MGKKAAAAGDGGPNTIYKATYSGVPVFEFICRNVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREVQ",
+       "KGEHEKVQGGYGKYQGTWVPIERGLALAKQYNVEDLLRPIIDFVPRESVSPPPAPKHAVAPPTKRNKEPK",
+       "PKEGLVPIKSAGVLSGTGRHQTPDSVGEDVESEVMDDMSESQTPSPLNGTSLLPAVDERSIDGMDIDGFS",
+       "MMNGGGHARKRSAAMMDDEDEYEQLKRARGNSAVHTPPPPGQSPRYGGMQHPLTQDEYNDIVLNYFVSEA",
+       "TQIPAVMTNPPYNWDPNGIIDDDHHTALHWAAAMGRTRVIKLLLSAGARIFDKNNLDQTPLMRSVMFTNN",
+       "YDLRKFPEVFELLHRSTLNIDKNNRTVFHHIANLALYKGKTHAARYYMEVILSRLADYPQELADVINFAD",
+       "EDGETALTLAARARSKRIVKALLDHGADPKLRNRDHKSAEDYILEDERFRSSPDVMLNRTQPSAAPRNPT",
+       "SLGAAVFSQGLPPQLYNSEAARLASGPHSSDILQQMQALARSFEAEKLNKERDVLEAKAMLTSIHTEVND",
+       "AGRTLHNLGEQMKPLEAKQGELDGLVERLQSKLQKDLARGARKWKAADEGRENRWKNGDDPSQAGEDYSD",
+       "LPELTAIPDNAEAEEERLRGEIEKMRARRGELVTRLVKAQTQTGTTDKMAQYRRLITAGCGGDINPGEID",
+       "DIVGQLLDMLENEAQSGRPAPPPQAAPSWVTS"]
+  }
+]
--- a/myScripts/README-myScripts.txt
+++ b/myScripts/README-myScripts.txt
@@ -1,8 +1,8 @@
-README - myScripts folder:
-==========================
-
-The "myScripts" folder is a place to keep your personal files
-safe. No files will be submitted into this folder on the GitHub, master
-copy. Thefore everything you put into this folder is safe from being
-inadvertently overwritten by a file with the same name that would be
-downloaded in a GitHub "pull" request.
+README - myScripts folder:
+==========================
+
+The "myScripts" folder is a place to keep your personal files
+safe. No files will be submitted into this folder on the GitHub, master
+copy. Thefore everything you put into this folder is safe from being
+inadvertently overwritten by a file with the same name that would be
+downloaded in a GitHub "pull" request.
--- a/myScripts/makeProteinDB.R
+++ b/myScripts/makeProteinDB.R
@@ -1,4 +1,4 @@
-source("./scripts/ABC-createRefDB.R")
-
-myDB <- dbAddProtein(myDB, jsonlite::fromJSON("./myScripts/MBP1_CUTOL.json"))
-myDB <- dbAddTaxonomy(myDB, jsonlite::fromJSON("./myScripts/CUTOLtaxonomy.json"))
+source("./scripts/ABC-createRefDB.R")
+
+myDB <- dbAddProtein(myDB, jsonlite::fromJSON("./myScripts/MBP1_CUTOL.json"))
+myDB <- dbAddTaxonomy(myDB, jsonlite::fromJSON("./myScripts/CUTOLtaxonomy.json"))
--- a/myScripts/myScript.R
+++ b/myScripts/myScript.R
@@ -1,38 +1,38 @@
-# myScript.R
-#
-# --- As you work with this file, you can delete the instructions below --------
-# Write your notes and code experiments into this document. Save it
-# from time to time - however I recommend that you do not _commit_
-# your saved version.
-#
-# As long as you do not _commit_ this script to version control,
-# you can _pull_ updated versions of the entire project from GitHub
-# by using the RStudio version control interface. However, once
-# you _commit_ any file in your local version, RStudio will require
-# you to resolve conflicts before you can _pull_ updates.
-# --- As you work with this file, you can delete the instructions above --------
-#
-## Purpose: <...>
-#
-# Version: <...>
-#
-# Date:    <...>
-# Author:  <Name> (<namee@mail.utoronto.ca>)
-#
-# Versions:
-#
-#   <number>    <Features>
-#
-# TODO:
-#   <...>
-#
-# ====================================================================
-
-
-
-
-
-
-
-# [END]
-
+# myScript.R
+#
+# --- As you work with this file, you can delete the instructions below --------
+# Write your notes and code experiments into this document. Save it
+# from time to time - however I recommend that you do not _commit_
+# your saved version.
+#
+# As long as you do not _commit_ this script to version control,
+# you can _pull_ updated versions of the entire project from GitHub
+# by using the RStudio version control interface. However, once
+# you _commit_ any file in your local version, RStudio will require
+# you to resolve conflicts before you can _pull_ updates.
+# --- As you work with this file, you can delete the instructions above --------
+#
+## Purpose: <...>
+#
+# Version: <...>
+#
+# Date:    <...>
+# Author:  <Name> (<namee@mail.utoronto.ca>)
+#
+# Versions:
+#
+#   <number>    <Features>
+#
+# TODO:
+#   <...>
+#
+# ====================================================================
+
+
+
+
+
+
+
+# [END]
+