Line termination change and old code.

This commit is contained in:
2021-11-16 00:31:48 -05:00
parent b1e00f52f7
commit affe00f6fb
86 changed files with 37873 additions and 37876 deletions

View File

@@ -1,21 +1,21 @@
# .myProfile.R
# This contains information which the course framework needs from time to time
# to personalize assignments, validate submissions etc. Make sure that
# the information correctly matches our official records.
# myEmail char A string with your eMail address. Use your official
# UofT eMail address.
# myStudentNumber numeric Your UofT student number. Take care to have this
# correct.
#
# NOTE:
# After you have updated this script, move the file to your "myScripts" folder.
# Utility scripts will look for it on the path: "./myScripts/.myProfile.R"
#
# ==============================================================================
# options(stringsAsFactors = FALSE)
myEMail <- "yh.deng@mail.utoronto.ca" # e.g. "u.franklin@utoronto.ca"
myStudentNumber <- 1005845285 # e.g. 1003141592
MYSPE <- "Cutaneotrichosporon oleaginosum"
# [END]
# .myProfile.R
# This contains information which the course framework needs from time to time
# to personalize assignments, validate submissions etc. Make sure that
# the information correctly matches our official records.
# myEmail char A string with your eMail address. Use your official
# UofT eMail address.
# myStudentNumber numeric Your UofT student number. Take care to have this
# correct.
#
# NOTE:
# After you have updated this script, move the file to your "myScripts" folder.
# Utility scripts will look for it on the path: "./myScripts/.myProfile.R"
#
# ==============================================================================
# options(stringsAsFactors = FALSE)
myEMail <- "yh.deng@mail.utoronto.ca" # e.g. "u.franklin@utoronto.ca"
myStudentNumber <- 1005845285 # e.g. 1003141592
MYSPE <- "Cutaneotrichosporon oleaginosum"
# [END]

View File

@@ -1,54 +1,51 @@
myFA <- readFASTA("data/RAB39B_HSa_coding.fa")
myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
gen_mutations <- function(seq, N) {
stats <- c()
stats <- cbind(stats, c(0, 0, 0))
rownames(stats) <- c("silent", "missense", "nonsense")
colnames(stats) <- c("occurrences")
# Actual function
for (i in 1:217) {
# select index for mutation
working_seq <- Biostrings::DNAString(seq)
aa_seq <- Biostrings::translate(working_seq, no.init.codon = TRUE)
mut_action <- sample(c("ins", "del", "sub"), 1, TRUE)
mut_seq <- Biostrings::DNAString(seq)
if (mut_action == "sub") {
mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
possible_mutations <- Biostrings::DNA_BASES
possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(working_seq[mut_index]))]
mut_change <- sample(possible_mutations, 1, replace = TRUE)
mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, mut_change)
} else if (mut_action == "ins") {
mut_index <- sample(1:length(working_seq) - 2, 1, replace = TRUE)
possible_mutations <- Biostrings::DNA_BASES
mut_seq <- Biostrings::DNAString(paste(substring(working_seq, 1, mut_index - 1), sample(possible_mutations, 1), substring(working_seq, mut_index), sep = ""))
} else {
mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
mut_seq <- mut_seq[-mut_index]
}
mut_seq <- Biostrings::DNAString(substring(mut_seq, 1, length(mut_seq) - (length(mut_seq) %% 3)))
mut_aa <- Biostrings::translate(mut_seq, no.init.codon = TRUE)
# Note: we need silent, nonsense, and missense
mut_aa_stop <- match("*", Biostrings::as.matrix(mut_aa))
aa_seq_stop <- match("*", Biostrings::as.matrix(aa_seq))
if (!is.na(mut_aa_stop) & (is.na(aa_seq_stop) | mut_aa_stop < aa_seq_stop)) {
stats["nonsense", "occurrences"] <- 1 + stats["nonsense", "occurrences"]
} else if (mut_aa == aa_seq) {
stats["silent", "occurrences"] <- 1 + stats["silent", "occurrences"]
} else {
stats["missense", "occurrences"] <- 1 + stats["missense", "occurrences"]
}
}
return(stats)
}
N_test <- 1200
gen_mutations("ATGATGATGATGATGATG", N_test)
gen_mutations("CCCCCCCCCCCCCCCCCC", N_test)
gen_mutations("TATTACTATTACTATTAC", N_test)
gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", N_test)
gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", N_test)
gen_mutations <- function(seq, N) {
sealKey() # See: http://steipe.biochemistry.utoronto.ca/abc/index.php/BCH441_Code_submisson_instructions
stats <- c()
stats <- cbind(stats, c(0, 0, 0))
rownames(stats) <- c("silent", "missense", "nonsense")
colnames(stats) <- c("occurrences")
# Actual function
for (i in 1:N) {
original_seq <- Biostrings::DNAString(seq)
aa_seq <- Biostrings::translate(original_seq, no.init.codon = TRUE)
mut_seq <- Biostrings::DNAString(seq)
mut_index <- sample(1:length(original_seq), 1, replace = TRUE)
possible_mutations <- Biostrings::DNA_BASES
possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(original_seq[mut_index]))]
mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, sample(possible_mutations, 1, replace = TRUE))
mut_aa <- Biostrings::translate(mut_seq, no.init.codon = TRUE)
term_aa <- regexpr(pattern = "\\*", aa_seq)
term_mut_aa <- as.integer(regexpr(pattern = "\\*", mut_aa))
if ((term_aa == -1 && term_mut_aa != -1) || (term_mut_aa != -1 && term_mut_aa < term_aa)) {
stats["nonsense", "occurrences"] <- 1 + stats["nonsense", "occurrences"]
} else if (mut_aa == aa_seq) {
stats["silent", "occurrences"] <- 1 + stats["silent", "occurrences"]
} else {
stats["missense", "occurrences"] <- 1 + stats["missense", "occurrences"]
}
}
sealKey()
return(stats)
}
gen_mutations("ATGATGATGATGATGATG", 1000)
gen_mutations("CCCCCCCCCCCCCCCCCC", 500)
gen_mutations("TATTACTATTACTATTAC", 500)
gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", 500)
gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", 500)
gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGA", 500)
myFA <- readFASTA("data/RAB39B_HSa_coding.fa")
myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
gen_mutations(myFA["RAB39B", 2], 10000)
gen_mutations(myFA["PTPN5", 2], 10000)
gen_mutations(myFA["PTPN11", 2], 10000)
gen_mutations(myFA["KRAS", 2], 10000)

View File

@@ -1,41 +1,41 @@
# == 1.3 Task: submit for credit (part 1/2) ================================
# == Submission - Code to add another philosopher to the datamodel:
pID <- autoincrement(philDB$person)
immanuelKant <- data.frame(id = pID,
name = "Immanuel Kant",
born = "1724",
died = "1804",
school = "Enlightenment Philosophy")
philDB$person <- rbind(philDB$person, immanuelKant)
bID = autoincrement(philDB$books)
immanuelKantWork <- data.frame(id = bID,
title = "Critique of Pure Reason",
published = "1781")
philDB$books <- rbind(philDB$books, immanuelKantWork)
philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
bID = autoincrement(philDB$books)
immanuelKantWork <- data.frame(id = bID,
title = "Critique of Judgement",
published = "1790")
philDB$books <- rbind(philDB$books, immanuelKantWork)
philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
# == Submission: Code to list the philosophical schools in alphabetical order as well as their respective books in alphabetical order.
schools <- unique(philDB$person$school)
schools <- sort(schools)
for (s in schools) {
cat(sprintf("%s\n", s))
authors = which(philDB$person$school == s)
for (author in authors) {
works = which(philDB$works$personID == author)
for (work in works) {
bookId = which(philDB$books$id == philDB$works$bookID[work])
cat(sprintf("\t%s - (%s)\n", philDB$books$title[bookId], philDB$books$published[bookId]))
}
}
# == 1.3 Task: submit for credit (part 1/2) ================================
# == Submission - Code to add another philosopher to the datamodel:
pID <- autoincrement(philDB$person)
immanuelKant <- data.frame(id = pID,
name = "Immanuel Kant",
born = "1724",
died = "1804",
school = "Enlightenment Philosophy")
philDB$person <- rbind(philDB$person, immanuelKant)
bID = autoincrement(philDB$books)
immanuelKantWork <- data.frame(id = bID,
title = "Critique of Pure Reason",
published = "1781")
philDB$books <- rbind(philDB$books, immanuelKantWork)
philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
bID = autoincrement(philDB$books)
immanuelKantWork <- data.frame(id = bID,
title = "Critique of Judgement",
published = "1790")
philDB$books <- rbind(philDB$books, immanuelKantWork)
philDB$works <- rbind(philDB$works, data.frame(id = autoincrement(philDB$works), personID = pID, bookID = bID))
# == Submission: Code to list the philosophical schools in alphabetical order as well as their respective books in alphabetical order.
schools <- unique(philDB$person$school)
schools <- sort(schools)
for (s in schools) {
cat(sprintf("%s\n", s))
authors = which(philDB$person$school == s)
for (author in authors) {
works = which(philDB$works$personID == author)
for (work in works) {
bookId = which(philDB$books$id == philDB$works$bookID[work])
cat(sprintf("\t%s - (%s)\n", philDB$books$title[bookId], philDB$books$published[bookId]))
}
}
}

View File

@@ -1,4 +1,4 @@
[{
"ID": 879819,
"species": "Cutaneotrichosporon oleaginosum"}
]
[{
"ID": 879819,
"species": "Cutaneotrichosporon oleaginosum"}
]

View File

@@ -1,19 +1,19 @@
[
{ "name" : "MBP1_CUTOL",
"RefSeqID" : "XP_018278493.1",
"UniProtID" : "A0A0J0XLN0",
"taxonomyID" : 879819,
"sequence" : [
"MGKKAAAAGDGGPNTIYKATYSGVPVFEFICRNVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREVQ",
"KGEHEKVQGGYGKYQGTWVPIERGLALAKQYNVEDLLRPIIDFVPRESVSPPPAPKHAVAPPTKRNKEPK",
"PKEGLVPIKSAGVLSGTGRHQTPDSVGEDVESEVMDDMSESQTPSPLNGTSLLPAVDERSIDGMDIDGFS",
"MMNGGGHARKRSAAMMDDEDEYEQLKRARGNSAVHTPPPPGQSPRYGGMQHPLTQDEYNDIVLNYFVSEA",
"TQIPAVMTNPPYNWDPNGIIDDDHHTALHWAAAMGRTRVIKLLLSAGARIFDKNNLDQTPLMRSVMFTNN",
"YDLRKFPEVFELLHRSTLNIDKNNRTVFHHIANLALYKGKTHAARYYMEVILSRLADYPQELADVINFAD",
"EDGETALTLAARARSKRIVKALLDHGADPKLRNRDHKSAEDYILEDERFRSSPDVMLNRTQPSAAPRNPT",
"SLGAAVFSQGLPPQLYNSEAARLASGPHSSDILQQMQALARSFEAEKLNKERDVLEAKAMLTSIHTEVND",
"AGRTLHNLGEQMKPLEAKQGELDGLVERLQSKLQKDLARGARKWKAADEGRENRWKNGDDPSQAGEDYSD",
"LPELTAIPDNAEAEEERLRGEIEKMRARRGELVTRLVKAQTQTGTTDKMAQYRRLITAGCGGDINPGEID",
"DIVGQLLDMLENEAQSGRPAPPPQAAPSWVTS"]
}
]
[
{ "name" : "MBP1_CUTOL",
"RefSeqID" : "XP_018278493.1",
"UniProtID" : "A0A0J0XLN0",
"taxonomyID" : 879819,
"sequence" : [
"MGKKAAAAGDGGPNTIYKATYSGVPVFEFICRNVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREVQ",
"KGEHEKVQGGYGKYQGTWVPIERGLALAKQYNVEDLLRPIIDFVPRESVSPPPAPKHAVAPPTKRNKEPK",
"PKEGLVPIKSAGVLSGTGRHQTPDSVGEDVESEVMDDMSESQTPSPLNGTSLLPAVDERSIDGMDIDGFS",
"MMNGGGHARKRSAAMMDDEDEYEQLKRARGNSAVHTPPPPGQSPRYGGMQHPLTQDEYNDIVLNYFVSEA",
"TQIPAVMTNPPYNWDPNGIIDDDHHTALHWAAAMGRTRVIKLLLSAGARIFDKNNLDQTPLMRSVMFTNN",
"YDLRKFPEVFELLHRSTLNIDKNNRTVFHHIANLALYKGKTHAARYYMEVILSRLADYPQELADVINFAD",
"EDGETALTLAARARSKRIVKALLDHGADPKLRNRDHKSAEDYILEDERFRSSPDVMLNRTQPSAAPRNPT",
"SLGAAVFSQGLPPQLYNSEAARLASGPHSSDILQQMQALARSFEAEKLNKERDVLEAKAMLTSIHTEVND",
"AGRTLHNLGEQMKPLEAKQGELDGLVERLQSKLQKDLARGARKWKAADEGRENRWKNGDDPSQAGEDYSD",
"LPELTAIPDNAEAEEERLRGEIEKMRARRGELVTRLVKAQTQTGTTDKMAQYRRLITAGCGGDINPGEID",
"DIVGQLLDMLENEAQSGRPAPPPQAAPSWVTS"]
}
]

View File

@@ -1,8 +1,8 @@
README - myScripts folder:
==========================
The "myScripts" folder is a place to keep your personal files
safe. No files will be submitted into this folder on the GitHub, master
copy. Thefore everything you put into this folder is safe from being
inadvertently overwritten by a file with the same name that would be
downloaded in a GitHub "pull" request.
README - myScripts folder:
==========================
The "myScripts" folder is a place to keep your personal files
safe. No files will be submitted into this folder on the GitHub, master
copy. Thefore everything you put into this folder is safe from being
inadvertently overwritten by a file with the same name that would be
downloaded in a GitHub "pull" request.

View File

@@ -1,4 +1,4 @@
source("./scripts/ABC-createRefDB.R")
myDB <- dbAddProtein(myDB, jsonlite::fromJSON("./myScripts/MBP1_CUTOL.json"))
myDB <- dbAddTaxonomy(myDB, jsonlite::fromJSON("./myScripts/CUTOLtaxonomy.json"))
source("./scripts/ABC-createRefDB.R")
myDB <- dbAddProtein(myDB, jsonlite::fromJSON("./myScripts/MBP1_CUTOL.json"))
myDB <- dbAddTaxonomy(myDB, jsonlite::fromJSON("./myScripts/CUTOLtaxonomy.json"))

View File

@@ -1,38 +1,38 @@
# myScript.R
#
# --- As you work with this file, you can delete the instructions below --------
# Write your notes and code experiments into this document. Save it
# from time to time - however I recommend that you do not _commit_
# your saved version.
#
# As long as you do not _commit_ this script to version control,
# you can _pull_ updated versions of the entire project from GitHub
# by using the RStudio version control interface. However, once
# you _commit_ any file in your local version, RStudio will require
# you to resolve conflicts before you can _pull_ updates.
# --- As you work with this file, you can delete the instructions above --------
#
## Purpose: <...>
#
# Version: <...>
#
# Date: <...>
# Author: <Name> (<namee@mail.utoronto.ca>)
#
# Versions:
#
# <number> <Features>
#
# TODO:
# <...>
#
# ====================================================================
# [END]
# myScript.R
#
# --- As you work with this file, you can delete the instructions below --------
# Write your notes and code experiments into this document. Save it
# from time to time - however I recommend that you do not _commit_
# your saved version.
#
# As long as you do not _commit_ this script to version control,
# you can _pull_ updated versions of the entire project from GitHub
# by using the RStudio version control interface. However, once
# you _commit_ any file in your local version, RStudio will require
# you to resolve conflicts before you can _pull_ updates.
# --- As you work with this file, you can delete the instructions above --------
#
## Purpose: <...>
#
# Version: <...>
#
# Date: <...>
# Author: <Name> (<namee@mail.utoronto.ca>)
#
# Versions:
#
# <number> <Features>
#
# TODO:
# <...>
#
# ====================================================================
# [END]