Line termination change and old code.

2021-11-16 00:31:48 -05:00 · 2021-11-16 00:31:48 -05:00 · affe00f6fb
commit affe00f6fb
parent b1e00f52f7
86 changed files with 37873 additions and 37876 deletions
--- a/RPR-SX-PDB.R
+++ b/RPR-SX-PDB.R
@ -104,7 +104,7 @@ str(apses)
 # bio3d's pdb objects are simple lists. Great! You know lists!
 # You see that there is a list element called $atom which is a data frame in
-# which the columns arevectors of the same length - namely the number of atoms
+# which the columns are vectors of the same length - namely the number of atoms
 # in the structure file. And there is a matrix of (x, y, z) triplets called xyz.
 # And there is a vector that holds sequence, and two tables called helix and
 # sheet. Let's pull out a few values to confirm how selection and subsetting
--- a/myScripts/ABC-INT-Mutation_impact-code.R
+++ b/myScripts/ABC-INT-Mutation_impact-code.R
@ -1,42 +1,25 @@
 myFA <-             readFASTA("data/RAB39B_HSa_coding.fa")
 myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
 myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
 myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
 rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
 gen_mutations <- function(seq, N) {
  sealKey() # See: http://steipe.biochemistry.utoronto.ca/abc/index.php/BCH441_Code_submisson_instructions
  stats <- c()
  stats <- cbind(stats, c(0, 0, 0))
  rownames(stats) <- c("silent", "missense", "nonsense")
  colnames(stats) <- c("occurrences")
  # Actual function
-  for (i in 1:217) {
+  for (i in 1:N) {
-    # select index for mutation
+    original_seq <- Biostrings::DNAString(seq)
-    working_seq <- Biostrings::DNAString(seq)
+    aa_seq <- Biostrings::translate(original_seq, no.init.codon = TRUE)
-    aa_seq <- Biostrings::translate(working_seq, no.init.codon = TRUE)
+
    mut_action <- sample(c("ins", "del", "sub"), 1, TRUE)
    mut_seq <- Biostrings::DNAString(seq)
-    if (mut_action == "sub") {
+    mut_index <- sample(1:length(original_seq), 1, replace = TRUE)
      mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
    possible_mutations <- Biostrings::DNA_BASES
-      possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(working_seq[mut_index]))]
+    possible_mutations <- possible_mutations[possible_mutations != as.character(unlist(original_seq[mut_index]))]
-      mut_change <- sample(possible_mutations, 1, replace = TRUE)
+    mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, sample(possible_mutations, 1, replace = TRUE))
      mut_seq <- Biostrings::replaceLetterAt(mut_seq, mut_index, mut_change)
    } else if (mut_action == "ins") {
      mut_index <- sample(1:length(working_seq) - 2, 1, replace = TRUE)
      possible_mutations <- Biostrings::DNA_BASES
      mut_seq <- Biostrings::DNAString(paste(substring(working_seq, 1, mut_index - 1), sample(possible_mutations, 1), substring(working_seq, mut_index), sep = ""))
    } else {
      mut_index <- sample(1:length(working_seq), 1, replace = TRUE)
      mut_seq <- mut_seq[-mut_index]
    }
    mut_seq <- Biostrings::DNAString(substring(mut_seq, 1, length(mut_seq) - (length(mut_seq) %% 3)))
    mut_aa <- Biostrings::translate(mut_seq, no.init.codon = TRUE)
-    # Note: we need silent, nonsense, and missense
+
-    mut_aa_stop <- match("*", Biostrings::as.matrix(mut_aa))
+    term_aa <- regexpr(pattern = "\\*", aa_seq)
-    aa_seq_stop <- match("*", Biostrings::as.matrix(aa_seq))
+    term_mut_aa <- as.integer(regexpr(pattern = "\\*", mut_aa))
-    if (!is.na(mut_aa_stop) & (is.na(aa_seq_stop) | mut_aa_stop < aa_seq_stop)) {
+    if ((term_aa == -1 && term_mut_aa != -1) || (term_mut_aa != -1 && term_mut_aa < term_aa)) {
      stats["nonsense", "occurrences"] <- 1 + stats["nonsense", "occurrences"]
    } else if (mut_aa == aa_seq) {
      stats["silent", "occurrences"] <- 1 + stats["silent", "occurrences"]
@ -44,11 +27,25 @@ gen_mutations <- function(seq, N) {
      stats["missense", "occurrences"] <- 1 + stats["missense", "occurrences"]
    }
  }
  sealKey()
  return(stats)
 }
-N_test <- 1200
+
-gen_mutations("ATGATGATGATGATGATG", N_test)
+gen_mutations("ATGATGATGATGATGATG", 1000)
-gen_mutations("CCCCCCCCCCCCCCCCCC", N_test)
+gen_mutations("CCCCCCCCCCCCCCCCCC", 500)
-gen_mutations("TATTACTATTACTATTAC", N_test)
+gen_mutations("TATTACTATTACTATTAC", 500)
-gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", N_test)
+gen_mutations("TGGTGGTGGTGGTGGTGGTGGTGG", 500)
-gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", N_test)
+gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGT", 500)
 gen_mutations("TGTTGTTGTTGTTGTTGTTGTTGA", 500)
 myFA <-             readFASTA("data/RAB39B_HSa_coding.fa")
 myFA <- rbind(myFA, readFASTA("data/PTPN5_HSa_coding.fa"))
 myFA <- rbind(myFA, readFASTA("data/PTPN11_HSa_coding.fa"))
 myFA <- rbind(myFA, readFASTA("data/KRAS_HSa_coding.fa"))
 rownames(myFA)<-c("RAB39B", "PTPN5", "PTPN11", "KRAS") # Assign row names
 gen_mutations(myFA["RAB39B", 2], 10000)
 gen_mutations(myFA["PTPN5", 2], 10000)
 gen_mutations(myFA["PTPN11", 2], 10000)
 gen_mutations(myFA["KRAS", 2], 10000)