Obsolete
This commit is contained in:
parent
e1679d8d07
commit
abe1c4974c
@ -1,410 +0,0 @@
|
||||
# create_refDB.R
|
||||
# Create a reference protein database for Mbp1-like proteins
|
||||
#
|
||||
# Boris Steipe for BCH441
|
||||
#
|
||||
# For the species, see:
|
||||
# cf. http://steipe.biochemistry.utoronto.ca/abc/index.php/Reference_species_for_fungi
|
||||
#
|
||||
# For the schema, see dbInit() in .utilities.R
|
||||
#
|
||||
# ==============================================================================
|
||||
|
||||
refDB <- dbInit()
|
||||
|
||||
|
||||
# === protein table ===
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_ASPNI",
|
||||
RefSeqID = "XP_660758",
|
||||
UniProtID = "Q5B8H6",
|
||||
taxonomy.ID = as.integer(162425),
|
||||
sequence = dbSanitizeSequence("
|
||||
MAAVDFSNVYSATYSSVPVYEFKIGTDSVMRRRSDDWINATHILKVAGFDKPARTRILEREVQKGVHEKVQGGYGKYQGT
|
||||
WIPLQEGRQLAERNNILDKLLPIFDYVAGDRSPPPAPKHTSAASKPRAPKINKRVVKEDVFSAVNHHRSMGPPSFHHEHY
|
||||
DVNTGLDEDESIEQATLESSSMIADEDMISMSQNGPYSSRKRKRGINEVAAMSLSEQEHILYGDQLLDYFMTVGDAPEAT
|
||||
RIPPPQPPANFQVDRPIDDSGNTALHWACAMGDLEIVKDLLRRGADMKALSIHEETPLVRAVLFTNNYEKRTFPALLDLL
|
||||
LDTISFRDWFGATLFHHIAQTTKSKGKWKSSRYYCEVALEKLRTTFSPEEVDLLLSCQDSVGDTAVLVAARNGVFRLVDL
|
||||
LLSRCPRAGDLVNKRGETASSIMQRAHLAERDIPPPPSSITMGNDHIDGEVGAPTSLEPQSVTLHHESSPATAQLLSQIG
|
||||
AIMAEASRKLTSSYGAAKPSQKDSDDVANPEALYEQLEQDRQKIRRQYDALAAKEAAEESSDAQLGRYEQMRDNYESLLE
|
||||
QIQRARLKERLASTPVPTQTAVIGSSSPEQDRLLTTFQLSRALCSEQKIRRAAVKELAQQRADAGVSTKFDVHRKLVALA
|
||||
TGLKEEELDPMAAELAETLEFDRMNGKGVGPESPEADHKDSASLPFPGPVVSVDA"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_BIPOR",
|
||||
RefSeqID = "XP_007682304",
|
||||
UniProtID = "W6ZM86",
|
||||
taxonomy.ID = as.integer(101162),
|
||||
sequence = dbSanitizeSequence("
|
||||
MPPAPDGKIYSATYSNVPVYECNVNGHHVMRRRADDWINATHILKVADYDKPARTRILEREVQKGVHEKVQGGYGKYQGT
|
||||
WIPLEEGRGLAERNGVLDKMRAIFDYVPGDRSPPPAPKHATAASNRMKPPRQTAAAVAAAAVAAAAAAAAVANHNALMSN
|
||||
SRSQASEDPYENSQRSQIYREDTPDNETVISESMLGDADLMDMSQYSADGNRKRKRGMDQMSLLDQQHQIWADQLLDYFM
|
||||
LLDHEAAVSWPEPPPSINLDRPIDEKGHAAMHWAAAMGDVGVVKELIHRGARLDCLSNNLETPLMRAVMFTNNFDKETMP
|
||||
SMVKIFQQTVHRTDWFGSTVFHHIAATTSSSNKYVCARWYLDCIINKLSETWIPEEVTRLLNAADQNGDTAIMIAARNGA
|
||||
RKCVRSLLGRNVAVDIPNKKGETADDLIRELNQRRRMHGRTRQASSSPFAPAPEHRLNGHVPHFDGGPLMSVPVPSMAVR
|
||||
ESVQYRSQTASHLMTKVAPTLLEKCEELATAYEAELQEKEAEFFDAERVVKRRQAELEAVRKQVAELQSMSKGLHIDLND
|
||||
EEAERQQEDELRLLVEEAESLLEIEQKAELRRLCSSMPQQNSDSSPVDITEKMRLALLLHRAQLERRELVREVVGNLSVA
|
||||
GMSEKQGTYKKLIAKALGEREEDVESMLPEILQELEEAETQERAEGLDGSPV"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_NEUCR",
|
||||
RefSeqID = "XP_955821",
|
||||
UniProtID = "Q7RW59",
|
||||
taxonomy.ID = as.integer(5141),
|
||||
sequence = dbSanitizeSequence("
|
||||
MVKENVGGNPEPGIYSATYSGIPVWEYQFGVDLKEHVMRRRHDDWVNATHILKAAGFDKPARTRILEREVQKDTHEKIQG
|
||||
GYGRYQGTWIPLEQAEALARRNNIYERLKPIFEFQPGNESPPPAPRHASKPKAPKVKPAVPTWGSKSAKNANPPQPGTFL
|
||||
PPGRKGLPAQAPDYNDADTHMHDDDTPDNLTVASASYMAEDDRYDHSHFSTGHRKRKRDELIEDMTEQQHAVYGDELLDY
|
||||
FLLSRNEQPAVRPDPPPNFKPDWPIDNERHTCLHWASAMGDVDVMRQLKKFGASLDAQNVRGETPFMRAVNFTNCFEKQT
|
||||
FPQVMKELFSTIDCRDLSGCTVIHHAAVMKIGRVNSQSCSRYYLDIILNRLQETHHPEFVQQLLDAQDNDGNTAVHLAAM
|
||||
RDARKCIRALLGRGASTDIPNKQGIRAEELIKELNASISKSRSNLPQRSSSPFAPDTQRHDAFHEAISESMVTSRKNSQP
|
||||
NYSSDAANTVQNRITPLVLQKLKDLTATYDSEFKEKDDAEKEARRILNKTQSELKALTASIDDYNSRLDTDDVAAKTAAE
|
||||
MATARHKVLAFVTHQNRISVQEAVKQELAALDRANAVTNGTSTKSKSSSPSKKPKLSPIPDQKDKPPKDENETESEAEHP
|
||||
DPPAAQAHQQQPGPSSQDTEVEDQDREEEEDDYTHRLSLAAELRSILQEQRSAENDYVEARGMLGTGERIDKYKHLLMSC
|
||||
LPPDEQENLEENLEEMIKLMEQEDESVTDLPAGAVGGGGGGNAADGSGGGGQPSNGRRESVLPALRGGNGDGEMSRRGSR
|
||||
TAAAAAAQVDGEREINGRAGAERTERIQEIAAV"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_SACCE",
|
||||
RefSeqID = "NP_010227",
|
||||
UniProtID = "P39678",
|
||||
taxonomy.ID = as.integer(4932),
|
||||
sequence = dbSanitizeSequence("
|
||||
MSNQIYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHILKAANFAKAKRTRILEKEVLKETHEKVQGGF
|
||||
GKYQGTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASPPPAPKHHHASKVDRKKAIRSASTSAIMET
|
||||
KRNNKKAEENQFQSSKILGNPTAAPRKRGRPVGSTRGSRRKLGVNLQRSQSDMGFPRPAIPNSSISTTQL
|
||||
PSIRSTMGPQSPTLGILEEERHDSRQQQPQQNNSAQFKEIDLEDGLSSDVEPSQQLQQVFNQNTGFVPQQ
|
||||
QSSLIQTQQTESMATSVSSSPSLPTSPGDFADSNPFEERFPGGGTSPIISMIPRYPVTSRPQTSDINDKV
|
||||
NKYLSKLVDYFISNEMKSNKSLPQVLLHPPPHSAPYIDAPIDPELHTAFHWACSMGNLPIAEALYEAGTS
|
||||
IRSTNSQGQTPLMRSSLFHNSYTRRTFPRIFQLLHETVFDIDSQSQTVIHHIVKRKSTTPSAVYYLDVVL
|
||||
SKIKDFSPQYRIELLLNTQDKNGDTALHIASKNGDVVFFNTLVKMGALTTISNKEGLTANEIMNQQYEQM
|
||||
MIQNGTNQHVNSSNTDLNIHVNTNNIETKNDVNSMVIMSPVSPSDYITYPSQIATNISRNIPNVVNSMKQ
|
||||
MASIYNDLHEQHDNEIKSLQKTLKSISKTKIQVSLKTLEVLKESSKDENGEAQTNDDFEILSRLQEQNTK
|
||||
KLRKRLIRYKRLIKQKLEYRQTVLLNKLIEDETQATTNNTVEKDNNTLERLELAQELTMLQLQRKNKLSS
|
||||
LVKKFEDNAKIHKYRRIIREGTEMNIEEVDSSLDVILQTLIANNNKNKGAEQIITISNANSHA"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_SCHPO", # actually the Res2 protein
|
||||
RefSeqID = "NP_593032",
|
||||
UniProtID = "P41412",
|
||||
taxonomy.ID = as.integer(4896),
|
||||
sequence = dbSanitizeSequence("
|
||||
MAPRSSAVHVAVYSGVEVYECFIKGVSVMRRRRDSWLNATQILKVADFDKPQRTRVLERQVQIGAHEKVQGGYGKYQGTW
|
||||
VPFQRGVDLATKYKVDGIMSPILSLDIDEGKAIAPKKKQTKQKKPSVRGRRGRKPSSLSSSTLHSVNEKQPNSSISPTIE
|
||||
SSMNKVNLPGAEEQVSATPLPASPNALLSPNDNTIKPVEELGMLEAPLDKYEESLLDFFLHPEEGRIPSFLYSPPPDFQV
|
||||
NSVIDDDGHTSLHWACSMGHIEMIKLLLRANADIGVCNRLSQTPLMRSVIFTNNYDCQTFGQVLELLQSTIYAVDTNGQS
|
||||
IFHHIVQSTSTPSKVAAAKYYLDCILEKLISIQPFENVVRLVNLQDSNGDTSLLIAARNGAMDCVNSLLSYNANPSIPNR
|
||||
QRRTASEYLLEADKKPHSLLQSNSNASHSAFSFSGISPAIISPSCSSHAFVKAIPSISSKFSQLAEEYESQLREKEEDLI
|
||||
RANRLKQDTLNEISRTYQELTFLQKNNPTYSQSMENLIREAQETYQQLSKRLLIWLEARQIFDLERSLKPHTSLSISFPS
|
||||
DFLKKEDGLSLNNDFKKPACNNVTNSDEYEQLINKLTSLQASRKKDTLYIRKLYEELGIDDTVNSYRRLIAMSCGINPED
|
||||
LSLEILDAVEEALTREK"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_COPCI",
|
||||
RefSeqID = "XP_001837394",
|
||||
UniProtID = "A8NYC6",
|
||||
taxonomy.ID = as.integer(5346),
|
||||
sequence = dbSanitizeSequence("
|
||||
MPEAQIFKATYSGIPVYEMMCKGVAVMRRRSDSWLNATQILKVAGFDKPQRTRVLEREVQKGEHEKVQGGYGKYQGTWIP
|
||||
LERGMQLAKQYNCEHLLRPIIEFTPAAKSPPLAPKHLVATAGNRPVRKPLTTDLSAAVINTRSTRKQVADGVGEESDHDT
|
||||
HSLRGSEDGSMTPSPSEASSSSRTPSPIHSPGTYHSNGLDGPSSGGRNRYRQSNDRYDEDDDASRHNGMGDPRSYGDQIL
|
||||
EYFISDTNQIPPILITPPPDFDPNMAIDDDGHTSLHWACAMGRIRIVKLLLSAGADIFKVNKAGQTALMRSVMFANNYDV
|
||||
RKFPELYELLHRSTLNIDNSNRTVFHHVVDVAMSKGKTHAARYYMETILTRLADYPKELADVINFQDEDGETALTMAARC
|
||||
RSKRLVKLLIDHGADPKINNHDGKNAEDYILEDERFRSSPAPSSRVAAMSYRNAQVAYPPPGAPSTYSFAPANHDRPPLH
|
||||
YSAAAQKASTRCVNDMASMLDSLAASFDQELRDKERDMAQAQALLTNIQAEILESQRTVLQLRQQAEGLSQAKQRLADLE
|
||||
NALQDKMGRRYRLGFEKWIKDEETREKVIRDAANGDLVLTPATTSYTVDEDGDSDSGSNGDKNKGKRKAQVQQEEVSDLV
|
||||
ELYSNIPTDPEELRKQCEALREEVSQSRKRRKAMFDELVTFQAEAGTSGRMSDYRRLIAAGCGGLEPLEIDSVLGMLLET
|
||||
LEAEDPSSTSATWSGSKGQQTG"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_CRYNE",
|
||||
RefSeqID = "XP_569090",
|
||||
UniProtID = "Q5KMQ9",
|
||||
taxonomy.ID = as.integer(5207),
|
||||
sequence = dbSanitizeSequence("
|
||||
MGKKVIASGGDNGPNTIYKATYSGVPVYEMVCRDVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREVQKGEHEKVQG
|
||||
GYGKYQGTWIPIERGLALAKQYGVEDILRPIIDYVPTSVSPPPAPKHSVAPPSKARRDKEKETGRTKATPSRTGPTSAAA
|
||||
LQAQAQLNRAKMHDSTPDADASFRSFEERVSLTPEDDSSSDTPSPVASVMTDQDMEVDKMGMHMSMPNVTLSQNMEELGA
|
||||
GSRKRSAAMMMEDEDQFGQLRSIRGNSAVHTPHGTPRHLGIGMPPEPIGPEQYTDIILNYFVSETSQIPSILVSPPHDFD
|
||||
PNAPIDDDGHTALHWACAMGRVRVVKLLLTAGASIFAGNNAEQTPLMRSVMFSNNYDMRKFPELYELLHRSTLNIDKQNR
|
||||
TVFHHIANLALTKGKTHAAKYYMETILARLADYPQELADVINFQDEEGETALTIAARARSRRLVKALLDHGANPKIKNRD
|
||||
SRSAEDYILEDERFRSSPVPAPNGGIGKASTSAAAEKPLFAPQLYFSEAARLCGGQALTDITSHMQSLARSFDAELQGKE
|
||||
RDILQAKALLTNIHTEVTENGRSITAITNQAAPLEEKRRELEALQASLKTRVKDALKKGYIGWLEGELVREQRWENGELE
|
||||
GNEEEKAAVQALRDVPTGGQEVVQAEEEKLRWEIEEKRKRRAMFVEKFVRAQTEAGTSEQIAKYRKLVSAGLGGVSTNEV
|
||||
DELMNQLLEGLEEENDNQVYNTTAGESGPSSWVQ"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_PUCGR",
|
||||
RefSeqID = "XP_003327086",
|
||||
UniProtID = "E3KED4",
|
||||
taxonomy.ID = as.integer(5297),
|
||||
sequence = dbSanitizeSequence("
|
||||
MAYGGSIQPLRPPSRESATLHLHQPDLTVTSPPLSLTHCPPCVYSHFTHTPTSLIVIQVSLHSLLDQETYHLLPSRSPPT
|
||||
VSVRMGTTTIYKATYSGVPVLEMPCEGIAVMRRRSDSWLNATQILKVAGFDKPQRTRVLEREIQKGTHEKIQGGYGKYQG
|
||||
TWVPLDRGIDLAKQYGVDHLLSALFNFQPSSNESPPLAPKHVTALSTRVKVSKVSAASAARAARAVVPSLPSTSGLGGRN
|
||||
TNNSWSNFDSDNEPGLPPAASSRESNGNWATQSKLARSSNLARARANINNSHPEDLPVPAPDQLQASPLPSMQTADPEND
|
||||
NSLTPSELSLPSRTPSPIEDLPLTVNTASSQSTRNKGKSRDLPDDEDLSRGQKRKYDTSLVEDTSYSDGADDQYINGNPS
|
||||
NAASAKYAKLILDYFVSESSQIPNFLNDPPSDFDPNVVIDDDGHTALHWACAMGRIKIIKLLLTCGADIFRANNAGQTAL
|
||||
MRAVMFTNNHDLRTFPELFESFSGSVINIDRTDRTVFHYVIDIALTKGKVPAARYYLETILSQLSEYPKELIDILNFQDE
|
||||
DGETALTLAARCRSKKLVKILLDHGANPKTANRDGKSAEDYILEDDKFRALSPTPCSSGPIRQLDQNSPGGTSNRSDFVD
|
||||
LVDPVPIDSNLIPQRSPNASPPHYSETGQRVTKQLLPEVTSMIELLATTFDTELQDKERDLDHAVGLLSNIEKEYLEGQR
|
||||
KILNYERMLSDFGEKKLALGDLEKELNDKLGKRYRFGWEKYVRDEEERARRITEQRSKYLQELSIEDRKLLDSSNLRFAD
|
||||
PSKQEVLMKLQADERENSDLLNLIRTNSTDVESECDLLRESVQKLSEERERLFKEFINLSSENTGGENEEDDGANHTSAN
|
||||
TSRLNNYRKLISLGCGGIGLDEVDEVIESLNEGIDVNELNDNGFLTEQDEELGNHQNYHNIHTQGR"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_USTMA",
|
||||
RefSeqID = "XP_011392621",
|
||||
UniProtID = "A0A0D1DP35",
|
||||
taxonomy.ID = as.integer(5270),
|
||||
sequence = dbSanitizeSequence("
|
||||
MSGDKTIFKATYSGVPVYECIINNVAVMRRRSDDWLNATQILKVVGLDKPQRTRVLEREIQKGIHEKVQGGYGKYQGTWI
|
||||
PLDVAIELAERYNIQGLLQPITSYVPSAADSPPPAPKHTISTSNRSKKIIPADPGALGRSRRATSIETESEVIGAAPNNV
|
||||
SEGSMSPSPSDISSSSRTPSPLPADRAHPLHANHALAGYNGRDANNHARYADIILDYFVTENTTVPSLLINPPPDFNPDM
|
||||
SIDDDEHTALHWACAMGRIRVVKLLLSAGADIFRVNSNQQTALMRATMFSNNYDLRKFPELFELLHRSILNIDRNDRTVF
|
||||
HHVVDLALSRGKPHAARYYMETMINRLADYGDQLADILNFQDDEGETPLTMAARARSKRLVRLLLEHGADPKIRNKEGKN
|
||||
AEDYIIEDERFRSSPSRTGPAGIELGADGLPVLPTSSLHTSEAGQRTAGRAVTLMSNLLHSLADSYDSEINTAEKKLTQA
|
||||
HGLLKQIQTEIEDSAKVAEALHHEAQGVDEERKRVDSLQLALKHAINKRARDDLERRWSEGKQAIKRARLQAGLEPGALS
|
||||
TSNATNAPATGDQKSKDDAKSLIEALPAGTNVKTAIAELRKQLSQVQANKTELVDKFVARAREQGTGRTMAAYRRLIAAG
|
||||
CGGIAPDEVDAVVGVLCELLQESHTGARAGAGGERDDRARDVAMMLKGAGAAALAANAGAP"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
|
||||
refDB$protein <-
|
||||
rbind(refDB$protein,
|
||||
data.frame(
|
||||
ID = dbAutoincrement(refDB$protein$ID, ns = "ref"),
|
||||
name = "MBP1_WALME",
|
||||
RefSeqID = "XP_006957051",
|
||||
UniProtID = "I4YGC0",
|
||||
taxonomy.ID = as.integer(1708541),
|
||||
sequence = dbSanitizeSequence("
|
||||
MSAPPIYKACYSGVPVYEFNCKNVAVMKRRSDSWMNATQILKVANFDKPQRTRILEREVQKGTHEKVQGGYGKYQGTWIP
|
||||
MERSVELARQYRIELLLDPIINYLPGPQSPPLAPKHATNVGSRARKSTAPAAQTLPSTSKVFHPLSSTKHPAKLAAATNA
|
||||
KAEISDGEDASIPSSPSFKSNSSRTPSPIRINARKRKLEDEATIPSSAIDGSISYEDIILDYFISESTQIPALLIHPPSD
|
||||
FNPNMSIDDEGHTAMHWACAMGKVRVVKLLLSAGADIFRVNHSEQTALMRSVMFSNNYDIRKFPQLYELLHRSTLNLDKH
|
||||
DRTVLHHIVDLALTKSKTHAARYYMECVLSKLANYPDELADVINFQDDEGESALTLAARARSKRLVKLLLEHGADSKLPN
|
||||
KDGKTAEDYILEDERFRQSPLLNSNHLRLHPPDTSIYAPPAHLFNSETSQNIANTSMSSVANLLESLAQSYDKEITQKER
|
||||
DYQQAQVILRNIKTDIVEAKSNIEKMTIDSSEFEHLKHKLRELEMKLEEHSNDVYNKGWEEYSRNVDDPAIDAPSDNVQE
|
||||
ECASLRNKIKDLQEKRISSMQELIKRQKEVGTGKKMSEYRKLISVGCGIPTTEIDAVLEMLLESLESENANKKAALASGI
|
||||
SGALSSTSSAPSQATTSAPTGVATPGAPVPASSEKAGLLPPAPVMQ"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
# === taxonomy table ===
|
||||
|
||||
refDB$taxonomy <-
|
||||
rbind(refDB$taxonomy,
|
||||
data.frame(
|
||||
ID = as.integer(c(162425,
|
||||
101162,
|
||||
5141,
|
||||
4932,
|
||||
4896,
|
||||
5346,
|
||||
5207,
|
||||
5297,
|
||||
5270,
|
||||
1708541)),
|
||||
species = c("Aspergillus nidulans",
|
||||
"Bipolaris oryzae",
|
||||
"Neurospora crassa",
|
||||
"Saccharomyces cerevisiae",
|
||||
"Schizosaccharomyces pombe",
|
||||
"Coprinopsis cinerea",
|
||||
"Cryptococcus neoformans",
|
||||
"Puccinia Graminis",
|
||||
"Ustilago maydis",
|
||||
"Wallemia mellicola"),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === feature table ===
|
||||
|
||||
refDB$feature <-
|
||||
rbind(refDB$feature,
|
||||
data.frame(
|
||||
ID = c("ref_ftr_1",
|
||||
"ref_ftr_2",
|
||||
"ref_ftr_3",
|
||||
"ref_ftr_4",
|
||||
"ref_ftr_5",
|
||||
"ref_ftr_6",
|
||||
"ref_ftr_7",
|
||||
"ref_ftr_8"),
|
||||
name = c("APSES fold",
|
||||
"KilA-N",
|
||||
"AT hook",
|
||||
"low complexity",
|
||||
"Ankyrin",
|
||||
"Swi6 fold",
|
||||
"coiled coil",
|
||||
"McInerny 2011"),
|
||||
type.ID = rep("ref_typ_1", 8),
|
||||
description = c("DNA binding domain by similarity to structure",
|
||||
"DNA binding domain by Pfam annotation",
|
||||
"DNA interaction motif by SMART annotation",
|
||||
"SEG annotation by SMART",
|
||||
"Ankyrin domain by SMART annotation",
|
||||
"Swi6 fold by similarity to structure",
|
||||
"Coiled coil by SMART annotation",
|
||||
"Yeast cell cycle review"),
|
||||
sourceDB = c("PDB",
|
||||
"Pfam",
|
||||
"SMART",
|
||||
"SMART",
|
||||
"SMART",
|
||||
"PDB",
|
||||
"SMART",
|
||||
"PubMed"),
|
||||
accession = c("1BM8_A_1_99",
|
||||
"PF04383",
|
||||
NA,
|
||||
NA,
|
||||
"SM00248",
|
||||
"1SW6_B",
|
||||
NA,
|
||||
NA),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
# === protein annotation table ===
|
||||
|
||||
# there are many! This, we don't code explicitly, but read from a textfile
|
||||
# I have prepared.
|
||||
|
||||
tmp <- read.table("referenceDomainAnnotations.txt",
|
||||
header = TRUE,
|
||||
sep = "\t",
|
||||
comment.char = "#",
|
||||
strip.white = TRUE,
|
||||
stringsAsFactors = FALSE)
|
||||
|
||||
# remove the notes column - that is in the text file, only for our reference,
|
||||
# not part of the data model
|
||||
tmp <- tmp[ , -(ncol(tmp))]
|
||||
|
||||
# add table IDs
|
||||
for (i in 1:nrow(tmp)) {
|
||||
tmp[i, "ID"] <- dbAutoincrement(tmp$ID, ns = "ref", code = "fan")
|
||||
}
|
||||
|
||||
# add table to DB
|
||||
refDB$proteinAnnotation <-
|
||||
rbind(refDB$proteinAnnotation,
|
||||
tmp)
|
||||
|
||||
|
||||
# === system table ===
|
||||
|
||||
refDB$system <-
|
||||
rbind(refDB$system,
|
||||
data.frame(
|
||||
ID = "ref_sys_1",
|
||||
name = "G1/S SACCE",
|
||||
notes = paste("Regulates transition from G1 to S phase",
|
||||
"in the yeast cell cycle."),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === component table ===
|
||||
|
||||
refDB$component <-
|
||||
rbind(refDB$component,
|
||||
data.frame(
|
||||
ID = "ref_cmp_1",
|
||||
protein.ID = "ref_pro_4", # MBP1_SACCE
|
||||
system.ID = "ref_sys_1", # G1/S SACCE
|
||||
status = "include",
|
||||
notes = paste("Part of MBF complex."),
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === system annotation table ===
|
||||
|
||||
refDB$systemAnnotation <-
|
||||
rbind(refDB$systemAnnotation,
|
||||
data.frame(
|
||||
ID = "ref_san_1",
|
||||
system.ID = "ref_sys_1", # G1/S SACCE
|
||||
feature.ID = "ref_ftr_8", # PubMed
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === component annotation table ===
|
||||
|
||||
refDB$componentAnnotation <-
|
||||
rbind(refDB$componentAnnotation,
|
||||
data.frame(
|
||||
ID = "ref_can_1",
|
||||
component.ID = "ref_cmp_1", # Mbp1 in G1/S SACCE
|
||||
feature.ID = "ref_ftr_8", # PubMed
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === type table ===
|
||||
|
||||
refDB$type <-
|
||||
rbind(refDB$type,
|
||||
data.frame(
|
||||
ID = "ref_typ_0",
|
||||
name = "UNDEF",
|
||||
description = "Undefined type",
|
||||
stringsAsFactors = FALSE))
|
||||
|
||||
|
||||
# === save
|
||||
|
||||
save(refDB, file = "data/refDB.RData")
|
||||
|
||||
# [END]
|
Loading…
x
Reference in New Issue
Block a user