From 131ec842e495113934bbaa94e1cc0c8cbfcf67f1 Mon Sep 17 00:00:00 2001 From: hyginn Date: Mon, 25 Sep 2017 01:32:41 -0400 Subject: [PATCH] JSON data sources --- data/MBP1_SACCE.json | 20 ++++ data/refAnnotations.json | 115 +++++++++++++++++++++ data/refFeatures.json | 47 +++++++++ data/refProteins.json | 155 ++++++++++++++++++++++++++++ data/refTaxonomy.json | 22 ++++ data/referenceDomainAnnotations.txt | 115 +++++++++++++++++++++ 6 files changed, 474 insertions(+) create mode 100644 data/MBP1_SACCE.json create mode 100644 data/refAnnotations.json create mode 100644 data/refFeatures.json create mode 100644 data/refProteins.json create mode 100644 data/refTaxonomy.json create mode 100644 data/referenceDomainAnnotations.txt diff --git a/data/MBP1_SACCE.json b/data/MBP1_SACCE.json new file mode 100644 index 0000000..56e91a3 --- /dev/null +++ b/data/MBP1_SACCE.json @@ -0,0 +1,20 @@ +[ + { "name" : "MBP1_SACCE", + "RefSeqID" : "NP_010227", + "UniProtID" : "P39678", + "taxonomyID" : 559292, + "sequence" : [ + "MSNQIYSARYSGVDVYEFIHSTGSIMKRKKDDWVNATHILKAANFAKAKRTRILEKEVLKETHEKVQGGF", + "GKYQGTWVPLNIAKQLAEKFSVYDQLKPLFDFTQTDGSASPPPAPKHHHASKVDRKKAIRSASTSAIMET", + "KRNNKKAEENQFQSSKILGNPTAAPRKRGRPVGSTRGSRRKLGVNLQRSQSDMGFPRPAIPNSSISTTQL", + "PSIRSTMGPQSPTLGILEEERHDSRQQQPQQNNSAQFKEIDLEDGLSSDVEPSQQLQQVFNQNTGFVPQQ", + "QSSLIQTQQTESMATSVSSSPSLPTSPGDFADSNPFEERFPGGGTSPIISMIPRYPVTSRPQTSDINDKV", + "NKYLSKLVDYFISNEMKSNKSLPQVLLHPPPHSAPYIDAPIDPELHTAFHWACSMGNLPIAEALYEAGTS", + "IRSTNSQGQTPLMRSSLFHNSYTRRTFPRIFQLLHETVFDIDSQSQTVIHHIVKRKSTTPSAVYYLDVVL", + "SKIKDFSPQYRIELLLNTQDKNGDTALHIASKNGDVVFFNTLVKMGALTTISNKEGLTANEIMNQQYEQM", + "MIQNGTNQHVNSSNTDLNIHVNTNNIETKNDVNSMVIMSPVSPSDYITYPSQIATNISRNIPNVVNSMKQ", + "MASIYNDLHEQHDNEIKSLQKTLKSISKTKIQVSLKTLEVLKESSKDENGEAQTNDDFEILSRLQEQNTK", + "KLRKRLIRYKRLIKQKLEYRQTVLLNKLIEDETQATTNNTVEKDNNTLERLELAQELTMLQLQRKNKLSS", + "LVKKFEDNAKIHKYRRIIREGTEMNIEEVDSSLDVILQTLIANNNKNKGAEQIITISNANSHA"] + } +] diff --git a/data/refAnnotations.json b/data/refAnnotations.json new file mode 100644 index 0000000..66d6ab3 --- /dev/null +++ b/data/refAnnotations.json @@ -0,0 +1,115 @@ +[ + {"pName" : "MBP1_SACCE", "fName" : "APSES fold", "start" : "4", "end" : "102"}, + {"pName" : "MBP1_SACCE", "fName" : "KilA-N", "start" : "22", "end" : "105"}, + {"pName" : "MBP1_SACCE", "fName" : "low complexity", "start" : "108", "end" : "122"}, + {"pName" : "MBP1_SACCE", "fName" : "low complexity", "start" : "236", "end" : "241"}, + {"pName" : "MBP1_SACCE", "fName" : "low complexity", "start" : "279", "end" : "307"}, + {"pName" : "MBP1_SACCE", "fName" : "low complexity", "start" : "700", "end" : "717"}, + {"pName" : "MBP1_SACCE", "fName" : "low complexity", "start" : "700", "end" : "717"}, + {"pName" : "MBP1_SACCE", "fName" : "Ankyrin fold", "start" : "394", "end" : "423"}, + {"pName" : "MBP1_SACCE", "fName" : "Ankyrin fold", "start" : "427", "end" : "463"}, + {"pName" : "MBP1_SACCE", "fName" : "Ankyrin fold", "start" : "512", "end" : "541"}, + {"pName" : "MBP1_SACCE", "fName" : "Swi6 fold", "start" : "381", "end" : "547"}, + {"pName" : "MBP1_SACCE", "fName" : "coiled coil", "start" : "633", "end" : "655"}, + + {"pName" : "MBP1_ASPNI", "fName" : "APSES fold", "start" : "9", "end" : "106"}, + {"pName" : "MBP1_ASPNI", "fName" : "KilA-N", "start" : "26", "end" : "109"}, + {"pName" : "MBP1_ASPNI", "fName" : "low complexity", "start" : "529", "end" : "534"}, + {"pName" : "MBP1_ASPNI", "fName" : "Ankyrin fold", "start" : "260", "end" : "289"}, + {"pName" : "MBP1_ASPNI", "fName" : "Ankyrin fold", "start" : "381", "end" : "413"}, + {"pName" : "MBP1_ASPNI", "fName" : "Swi6 fold", "start" : "193", "end" : "402"}, + {"pName" : "MBP1_ASPNI", "fName" : "coiled coil", "start" : "509", "end" : "572"}, + + {"pName" : "MBP1_BIPOR", "fName" : "APSES fold", "start" : "8", "end" : "106"}, + {"pName" : "MBP1_BIPOR", "fName" : "KilA-N", "start" : "26", "end" : "109"}, + {"pName" : "MBP1_BIPOR", "fName" : "low complexity", "start" : "134", "end" : "152"}, + {"pName" : "MBP1_BIPOR", "fName" : "low complexity", "start" : "267", "end" : "278"}, + {"pName" : "MBP1_BIPOR", "fName" : "low complexity", "start" : "670", "end" : "685"}, + {"pName" : "MBP1_BIPOR", "fName" : "Ankyrin fold", "start" : "266", "end" : "295"}, + {"pName" : "MBP1_BIPOR", "fName" : "Ankyrin fold", "start" : "387", "end" : "416"}, + {"pName" : "MBP1_BIPOR", "fName" : "Swi6 fold", "start" : "253", "end" : "421"}, + {"pName" : "MBP1_BIPOR", "fName" : "coiled coil", "start" : "659", "end" : "681"}, + {"pName" : "MBP1_BIPOR", "fName" : "coiled coil", "start" : "500", "end" : "590"}, + + {"pName" : "MBP1_NEUCR", "fName" : "APSES fold", "start" : "14", "end" : "114"}, + {"pName" : "MBP1_NEUCR", "fName" : "KilA-N", "start" : "34", "end" : "117"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "130", "end" : "141"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "253", "end" : "266"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "514", "end" : "525"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "554", "end" : "564"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "601", "end" : "618"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "620", "end" : "629"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "636", "end" : "652"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "658", "end" : "672"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "725", "end" : "735"}, + {"pName" : "MBP1_NEUCR", "fName" : "low complexity", "start" : "752", "end" : "771"}, + {"pName" : "MBP1_NEUCR", "fName" : "Ankyrin fold", "start" : "268", "end" : "297"}, + {"pName" : "MBP1_NEUCR", "fName" : "Ankyrin fold", "start" : "390", "end" : "419"}, + {"pName" : "MBP1_NEUCR", "fName" : "Swi6 fold", "start" : "270", "end" : "426"}, + {"pName" : "MBP1_NEUCR", "fName" : "coiled coil", "start" : "500", "end" : "550"}, + + {"pName" : "MBP1_SCHPO", "fName" : "APSES fold", "start" : "8", "end" : "104"}, + {"pName" : "MBP1_SCHPO", "fName" : "KilA-N", "start" : "25", "end" : "113"}, + {"pName" : "MBP1_SCHPO", "fName" : "low complexity", "start" : "111", "end" : "125"}, + {"pName" : "MBP1_SCHPO", "fName" : "low complexity", "start" : "136", "end" : "145"}, + {"pName" : "MBP1_SCHPO", "fName" : "low complexity", "start" : "176", "end" : "191"}, + {"pName" : "MBP1_SCHPO", "fName" : "low complexity", "start" : "422", "end" : "447"}, + {"pName" : "MBP1_SCHPO", "fName" : "Ankyrin fold", "start" : "247", "end" : "276"}, + {"pName" : "MBP1_SCHPO", "fName" : "Ankyrin fold", "start" : "368", "end" : "397"}, + {"pName" : "MBP1_SCHPO", "fName" : "Swi6 fold", "start" : "234", "end" : "400"}, + {"pName" : "MBP1_SCHPO", "fName" : "coiled coil", "start" : "457", "end" : "538"}, + + {"pName" : "MBP1_COPCI", "fName" : "APSES fold", "start" : "5", "end" : "103"}, + {"pName" : "MBP1_COPCI", "fName" : "KilA-N", "start" : "23", "end" : "106"}, + {"pName" : "MBP1_COPCI", "fName" : "low complexity", "start" : "170", "end" : "191"}, + {"pName" : "MBP1_COPCI", "fName" : "low complexity", "start" : "435", "end" : "450"}, + {"pName" : "MBP1_COPCI", "fName" : "low complexity", "start" : "611", "end" : "626"}, + {"pName" : "MBP1_COPCI", "fName" : "Ankyrin fold", "start" : "270", "end" : "299"}, + {"pName" : "MBP1_COPCI", "fName" : "Ankyrin fold", "start" : "389", "end" : "418"}, + {"pName" : "MBP1_COPCI", "fName" : "Ankyrin fold", "start" : "474", "end" : "509"}, + {"pName" : "MBP1_COPCI", "fName" : "Swi6 fold", "start" : "257", "end" : "429"}, + {"pName" : "MBP1_COPCI", "fName" : "coiled coil", "start" : "500", "end" : "570"}, + {"pName" : "MBP1_COPCI", "fName" : "coiled coil", "start" : "651", "end" : "678"}, + + {"pName" : "MBP1_CRYNE", "fName" : "APSES fold", "start" : "113", "end" : "211"}, + {"pName" : "MBP1_CRYNE", "fName" : "KilA-N", "start" : "131", "end" : "215"}, + {"pName" : "MBP1_CRYNE", "fName" : "low complexity", "start" : "66", "end" : "85"}, + {"pName" : "MBP1_CRYNE", "fName" : "low complexity", "start" : "413", "end" : "423"}, + {"pName" : "MBP1_CRYNE", "fName" : "low complexity", "start" : "633", "end" : "644"}, + {"pName" : "MBP1_CRYNE", "fName" : "low complexity", "start" : "697", "end" : "709"}, + {"pName" : "MBP1_CRYNE", "fName" : "Ankyrin fold", "start" : "477", "end" : "506"}, + {"pName" : "MBP1_CRYNE", "fName" : "Ankyrin fold", "start" : "618", "end" : "647"}, + {"pName" : "MBP1_CRYNE", "fName" : "Swi6 fold", "start" : "452", "end" : "663"}, + + {"pName" : "MBP1_PUCGR", "fName" : "APSES fold", "start" : "90", "end" : "187"}, + {"pName" : "MBP1_PUCGR", "fName" : "KilA-N", "start" : "107", "end" : "190"}, + {"pName" : "MBP1_PUCGR", "fName" : "low complexity", "start" : "208", "end" : "227"}, + {"pName" : "MBP1_PUCGR", "fName" : "low complexity", "start" : "273", "end" : "291"}, + {"pName" : "MBP1_PUCGR", "fName" : "Ankyrin fold", "start" : "442", "end" : "271"}, + {"pName" : "MBP1_PUCGR", "fName" : "Ankyrin fold", "start" : "475", "end" : "509"}, + {"pName" : "MBP1_PUCGR", "fName" : "Ankyrin fold", "start" : "561", "end" : "590"}, + {"pName" : "MBP1_PUCGR", "fName" : "Swi6 fold", "start" : "429", "end" : "601"}, + {"pName" : "MBP1_PUCGR", "fName" : "coiled coil", "start" : "827", "end" : "863"}, + + {"pName" : "MBP1_USTMA", "fName" : "APSES fold", "start" : "7", "end" : "104"}, + {"pName" : "MBP1_USTMA", "fName" : "KilA-N", "start" : "24", "end" : "107"}, + {"pName" : "MBP1_USTMA", "fName" : "low complexity", "start" : "106", "end" : "116"}, + {"pName" : "MBP1_USTMA", "fName" : "low complexity", "start" : "161", "end" : "183"}, + {"pName" : "MBP1_USTMA", "fName" : "low complexity", "start" : "657", "end" : "672"}, + {"pName" : "MBP1_USTMA", "fName" : "low complexity", "start" : "776", "end" : "796"}, + {"pName" : "MBP1_USTMA", "fName" : "Ankyrin fold", "start" : "245", "end" : "274"}, + {"pName" : "MBP1_USTMA", "fName" : "Ankyrin fold", "start" : "355", "end" : "384"}, + {"pName" : "MBP1_USTMA", "fName" : "Swi6 fold", "start" : "232", "end" : "395"}, + {"pName" : "MBP1_USTMA", "fName" : "coiled coil", "start" : "581", "end" : "609"}, + + {"pName" : "MBP1_WALME", "fName" : "APSES fold", "start" : "6", "end" : "103"}, + {"pName" : "MBP1_WALME", "fName" : "KilA-N", "start" : "23", "end" : "106"}, + {"pName" : "MBP1_WALME", "fName" : "low complexity", "start" : "149", "end" : "162"}, + {"pName" : "MBP1_WALME", "fName" : "low complexity", "start" : "171", "end" : "188"}, + {"pName" : "MBP1_WALME", "fName" : "low complexity", "start" : "618", "end" : "628"}, + {"pName" : "MBP1_WALME", "fName" : "low complexity", "start" : "634", "end" : "660"}, + {"pName" : "MBP1_WALME", "fName" : "Ankyrin fold", "start" : "250", "end" : "279"}, + {"pName" : "MBP1_WALME", "fName" : "Ankyrin fold", "start" : "369", "end" : "398"}, + {"pName" : "MBP1_WALME", "fName" : "Swi6 fold", "start" : "237", "end" : "409"}, + {"pName" : "MBP1_WALME", "fName" : "coiled coil", "start" : "461", "end" : "585"} +] diff --git a/data/refFeatures.json b/data/refFeatures.json new file mode 100644 index 0000000..8df1da7 --- /dev/null +++ b/data/refFeatures.json @@ -0,0 +1,47 @@ +[ + { "name" : "APSES fold", + "description " : "DNA binding domain by similarity to structure", + "sourceDB" : "PDB", + "accession" : "1BM8_A_1_99"}, + + { "name" : "KilA-N", + "description " : "DNA binding domain by Pfam annotation", + "sourceDB" : "Pfam", + "accession" : "PF04383"}, + + { "name" : "AT hook", + "description " : "DNA interaction motif by SMART annotation", + "sourceDB" : "SMART", + "accession" : null}, + + { "name" : "low complexity", + "description " : "SEG annotation by SMART", + "sourceDB" : "SMART", + "accession" : null}, + + { "name" : "Ankyrin fold", + "description " : "Ankyrin domain by SMART annotation", + "sourceDB" : "SMART", + "accession" : "SM00248"}, + + { "name" : "Swi6 fold", + "description " : "Swi6 fold by similarity to structure", + "sourceDB" : "PDB", + "accession" : "1SW6_B"}, + + { "name" : "coiled coil", + "description " : "Coiled coil by SMART annotation", + "sourceDB" : "SMART", + "accession" : null}, + + { "name" : "McInerny 2011", + "description " : "Yeast cell cycle review", + "sourceDB" : "PubMed", + "accession" : "21310294"} +] + + + + + + diff --git a/data/refProteins.json b/data/refProteins.json new file mode 100644 index 0000000..43832d2 --- /dev/null +++ b/data/refProteins.json @@ -0,0 +1,155 @@ +[ + { "name" : "MBP1_SCHPO", + "RefSeqID" : "NP_593032", + "UniProtID" : "P41412", + "taxonomyID" : 284812, + "sequence" : [ + "MAPRSSAVHVAVYSGVEVYECFIKGVSVMRRRRDSWLNATQILKVADFDKPQRTRVLERQVQIGAHEKVQ", + "GGYGKYQGTWVPFQRGVDLATKYKVDGIMSPILSLDIDEGKAIAPKKKQTKQKKPSVRGRRGRKPSSLSS", + "STLHSVNEKQPNSSISPTIESSMNKVNLPGAEEQVSATPLPASPNALLSPNDNTIKPVEELGMLEAPLDK", + "YEESLLDFFLHPEEGRIPSFLYSPPPDFQVNSVIDDDGHTSLHWACSMGHIEMIKLLLRANADIGVCNRL", + "SQTPLMRSVIFTNNYDCQTFGQVLELLQSTIYAVDTNGQSIFHHIVQSTSTPSKVAAAKYYLDCILEKLI", + "SIQPFENVVRLVNLQDSNGDTSLLIAARNGAMDCVNSLLSYNANPSIPNRQRRTASEYLLEADKKPHSLL", + "QSNSNASHSAFSFSGISPAIISPSCSSHAFVKAIPSISSKFSQLAEEYESQLREKEEDLIRANRLKQDTL", + "NEISRTYQELTFLQKNNPTYSQSMENLIREAQETYQQLSKRLLIWLEARQIFDLERSLKPHTSLSISFPS", + "DFLKKEDGLSLNNDFKKPACNNVTNSDEYEQLINKLTSLQASRKKDTLYIRKLYEELGIDDTVNSYRRLI", + "AMSCGINPEDLSLEILDAVEEALTREK"] + }, + { "name" : "MBP1_ASPNI", + "RefSeqID" : "XP_660758", + "UniProtID" : "Q5B8H6", + "taxonomyID" : 227321, + "sequence" : [ + "MAAVDFSNVYSATYSSVPVYEFKIGTDSVMRRRSDDWINATHILKVAGFDKPARTRILEREVQKGVHEKV", + "QGGYGKYQGTWIPLQEGRQLAERNNILDKLLPIFDYVAGDRSPPPAPKHTSAASKPRAPKINKRVVKEDV", + "FSAVNHHRSMGPPSFHHEHYDVNTGLDEDESIEQATLESSSMIADEDMISMSQNGPYSSRKRKRGINEVA", + "AMSLSEQEHILYGDQLLDYFMTVGDAPEATRIPPPQPPANFQVDRPIDDSGNTALHWACAMGDLEIVKDL", + "LRRGADMKALSIHEETPLVRAVLFTNNYEKRTFPALLDLLLDTISFRDWFGATLFHHIAQTTKSKGKWKS", + "SRYYCEVALEKLRTTFSPEEVDLLLSCQDSVGDTAVLVAARNGVFRLVDLLLSRCPRAGDLVNKRGETAS", + "SIMQRAHLAERDIPPPPSSITMGNDHIDGEVGAPTSLEPQSVTLHHESSPATAQLLSQIGAIMAEASRKL", + "TSSYGAAKPSQKDSDDVANPEALYEQLEQDRQKIRRQYDALAAKEAAEESSDAQLGRYEQMRDNYESLLE", + "QIQRARLKERLASTPVPTQTAVIGSSSPEQDRLLTTFQLSRALCSEQKIRRAAVKELAQQRADAGVSTKF", + "DVHRKLVALATGLKEEELDPMAAELAETLEFDRMNGKGVGPESPEADHKDSASLPFPGPVVSVDA"] + }, + { "name" : "MBP1_BIPOR", + "RefSeqID" : "XP_007682304", + "UniProtID" : "W6ZM86", + "taxonomyID" : 930090, + "sequence" : [ + "MPPAPDGKIYSATYSNVPVYECNVNGHHVMRRRADDWINATHILKVADYDKPARTRILEREVQKGVHEKV", + "QGGYGKYQGTWIPLEEGRGLAERNGVLDKMRAIFDYVPGDRSPPPAPKHATAASNRMKPPRQTAAAVAAA", + "AVAAAAAAAAVANHNALMSNSRSQASEDPYENSQRSQIYREDTPDNETVISESMLGDADLMDMSQYSADG", + "NRKRKRGMDQMSLLDQQHQIWADQLLDYFMLLDHEAAVSWPEPPPSINLDRPIDEKGHAAMHWAAAMGDV", + "GVVKELIHRGARLDCLSNNLETPLMRAVMFTNNFDKETMPSMVKIFQQTVHRTDWFGSTVFHHIAATTSS", + "SNKYVCARWYLDCIINKLSETWIPEEVTRLLNAADQNGDTAIMIAARNGARKCVRSLLGRNVAVDIPNKK", + "GETADDLIRELNQRRRMHGRTRQASSSPFAPAPEHRLNGHVPHFDGGPLMSVPVPSMAVRESVQYRSQTA", + "SHLMTKVAPTLLEKCEELATAYEAELQEKEAEFFDAERVVKRRQAELEAVRKQVAELQSMSKGLHIDLND", + "EEAERQQEDELRLLVEEAESLLEIEQKAELRRLCSSMPQQNSDSSPVDITEKMRLALLLHRAQLERRELV", + "REVVGNLSVAGMSEKQGTYKKLIAKALGEREEDVESMLPEILQELEEAETQERAEGLDGSPV"] + }, + { "name" : "MBP1_NEUCR", + "RefSeqID" : "XP_955821", + "UniProtID" : "Q7RW59", + "taxonomyID" : 367110, + "sequence" : [ + "MVKENVGGNPEPGIYSATYSGIPVWEYQFGVDLKEHVMRRRHDDWVNATHILKAAGFDKPARTRILEREV", + "QKDTHEKIQGGYGRYQGTWIPLEQAEALARRNNIYERLKPIFEFQPGNESPPPAPRHASKPKAPKVKPAV", + "PTWGSKSAKNANPPQPGTFLPPGRKGLPAQAPDYNDADTHMHDDDTPDNLTVASASYMAEDDRYDHSHFS", + "TGHRKRKRDELIEDMTEQQHAVYGDELLDYFLLSRNEQPAVRPDPPPNFKPDWPIDNERHTCLHWASAMG", + "DVDVMRQLKKFGASLDAQNVRGETPFMRAVNFTNCFEKQTFPQVMKELFSTIDCRDLSGCTVIHHAAVMK", + "IGRVNSQSCSRYYLDIILNRLQETHHPEFVQQLLDAQDNDGNTAVHLAAMRDARKCIRALLGRGASTDIP", + "NKQGIRAEELIKELNASISKSRSNLPQRSSSPFAPDTQRHDAFHEAISESMVTSRKNSQPNYSSDAANTV", + "QNRITPLVLQKLKDLTATYDSEFKEKDDAEKEARRILNKTQSELKALTASIDDYNSRLDTDDVAAKTAAE", + "MATARHKVLAFVTHQNRISVQEAVKQELAALDRANAVTNGTSTKSKSSSPSKKPKLSPIPDQKDKPPKDE", + "NETESEAEHPDPPAAQAHQQQPGPSSQDTEVEDQDREEEEDDYTHRLSLAAELRSILQEQRSAENDYVEA", + "RGMLGTGERIDKYKHLLMSCLPPDEQENLEENLEEMIKLMEQEDESVTDLPAGAVGGGGGGNAADGSGGG", + "GQPSNGRRESVLPALRGGNGDGEMSRRGSRTAAAAAAQVDGEREINGRAGAERTERIQEIAAV"] + }, + { "name" : "MBP1_COPCI", + "RefSeqID" : "XP_001837394", + "UniProtID" : "A8NYC6", + "taxonomyID" : 240176, + "sequence" : [ + "MPEAQIFKATYSGIPVYEMMCKGVAVMRRRSDSWLNATQILKVAGFDKPQRTRVLEREVQKGEHEKVQGG", + "YGKYQGTWIPLERGMQLAKQYNCEHLLRPIIEFTPAAKSPPLAPKHLVATAGNRPVRKPLTTDLSAAVIN", + "TRSTRKQVADGVGEESDHDTHSLRGSEDGSMTPSPSEASSSSRTPSPIHSPGTYHSNGLDGPSSGGRNRY", + "RQSNDRYDEDDDASRHNGMGDPRSYGDQILEYFISDTNQIPPILITPPPDFDPNMAIDDDGHTSLHWACA", + "MGRIRIVKLLLSAGADIFKVNKAGQTALMRSVMFANNYDVRKFPELYELLHRSTLNIDNSNRTVFHHVVD", + "VAMSKGKTHAARYYMETILTRLADYPKELADVINFQDEDGETALTMAARCRSKRLVKLLIDHGADPKINN", + "HDGKNAEDYILEDERFRSSPAPSSRVAAMSYRNAQVAYPPPGAPSTYSFAPANHDRPPLHYSAAAQKAST", + "RCVNDMASMLDSLAASFDQELRDKERDMAQAQALLTNIQAEILESQRTVLQLRQQAEGLSQAKQRLADLE", + "NALQDKMGRRYRLGFEKWIKDEETREKVIRDAANGDLVLTPATTSYTVDEDGDSDSGSNGDKNKGKRKAQ", + "VQQEEVSDLVELYSNIPTDPEELRKQCEALREEVSQSRKRRKAMFDELVTFQAEAGTSGRMSDYRRLIAA", + "GCGGLEPLEIDSVLGMLLETLEAEDPSSTSATWSGSKGQQTG"] + }, + { "name" : "MBP1_CRYNE", + "RefSeqID" : "XP_569090", + "UniProtID" : "Q5KMQ9", + "taxonomyID" : 214684, + "sequence" : [ + "MGKKVIASGGDNGPNTIYKATYSGVPVYEMVCRDVAVMRRRSDAYLNATQILKVAGFDKPQRTRVLEREV", + "QKGEHEKVQGGYGKYQGTWIPIERGLALAKQYGVEDILRPIIDYVPTSVSPPPAPKHSVAPPSKARRDKE", + "KETGRTKATPSRTGPTSAAALQAQAQLNRAKMHDSTPDADASFRSFEERVSLTPEDDSSSDTPSPVASVM", + "TDQDMEVDKMGMHMSMPNVTLSQNMEELGAGSRKRSAAMMMEDEDQFGQLRSIRGNSAVHTPHGTPRHLG", + "IGMPPEPIGPEQYTDIILNYFVSETSQIPSILVSPPHDFDPNAPIDDDGHTALHWACAMGRVRVVKLLLT", + "AGASIFAGNNAEQTPLMRSVMFSNNYDMRKFPELYELLHRSTLNIDKQNRTVFHHIANLALTKGKTHAAK", + "YYMETILARLADYPQELADVINFQDEEGETALTIAARARSRRLVKALLDHGANPKIKNRDSRSAEDYILE", + "DERFRSSPVPAPNGGIGKASTSAAAEKPLFAPQLYFSEAARLCGGQALTDITSHMQSLARSFDAELQGKE", + "RDILQAKALLTNIHTEVTENGRSITAITNQAAPLEEKRRELEALQASLKTRVKDALKKGYIGWLEGELVR", + "EQRWENGELEGNEEEKAAVQALRDVPTGGQEVVQAEEEKLRWEIEEKRKRRAMFVEKFVRAQTEAGTSEQ", + "IAKYRKLVSAGLGGVSTNEVDELMNQLLEGLEEENDNQVYNTTAGESGPSSWVQ"] + }, + { "name" : "MBP1_PUCGR", + "RefSeqID" : "XP_003327086", + "UniProtID" : "E3KED4", + "taxonomyID" : 418459, + "sequence" : [ + "MAYGGSIQPLRPPSRESATLHLHQPDLTVTSPPLSLTHCPPCVYSHFTHTPTSLIVIQVSLHSLLDQETY", + "HLLPSRSPPTVSVRMGTTTIYKATYSGVPVLEMPCEGIAVMRRRSDSWLNATQILKVAGFDKPQRTRVLE", + "REIQKGTHEKIQGGYGKYQGTWVPLDRGIDLAKQYGVDHLLSALFNFQPSSNESPPLAPKHVTALSTRVK", + "VSKVSAASAARAARAVVPSLPSTSGLGGRNTNNSWSNFDSDNEPGLPPAASSRESNGNWATQSKLARSSN", + "LARARANINNSHPEDLPVPAPDQLQASPLPSMQTADPENDNSLTPSELSLPSRTPSPIEDLPLTVNTASS", + "QSTRNKGKSRDLPDDEDLSRGQKRKYDTSLVEDTSYSDGADDQYINGNPSNAASAKYAKLILDYFVSESS", + "QIPNFLNDPPSDFDPNVVIDDDGHTALHWACAMGRIKIIKLLLTCGADIFRANNAGQTALMRAVMFTNNH", + "DLRTFPELFESFSGSVINIDRTDRTVFHYVIDIALTKGKVPAARYYLETILSQLSEYPKELIDILNFQDE", + "DGETALTLAARCRSKKLVKILLDHGANPKTANRDGKSAEDYILEDDKFRALSPTPCSSGPIRQLDQNSPG", + "GTSNRSDFVDLVDPVPIDSNLIPQRSPNASPPHYSETGQRVTKQLLPEVTSMIELLATTFDTELQDKERD", + "LDHAVGLLSNIEKEYLEGQRKILNYERMLSDFGEKKLALGDLEKELNDKLGKRYRFGWEKYVRDEEERAR", + "RITEQRSKYLQELSIEDRKLLDSSNLRFADPSKQEVLMKLQADERENSDLLNLIRTNSTDVESECDLLRE", + "SVQKLSEERERLFKEFINLSSENTGGENEEDDGANHTSANTSRLNNYRKLISLGCGGIGLDEVDEVIESL", + "NEGIDVNELNDNGFLTEQDEELGNHQNYHNIHTQGR"] + }, + { "name" : "MBP1_USTMA", + "RefSeqID" : "XP_011392621", + "UniProtID" : "A0A0D1DP35", + "taxonomyID" : 237631, + "sequence" : [ + "MSGDKTIFKATYSGVPVYECIINNVAVMRRRSDDWLNATQILKVVGLDKPQRTRVLEREIQKGIHEKVQG", + "GYGKYQGTWIPLDVAIELAERYNIQGLLQPITSYVPSAADSPPPAPKHTISTSNRSKKIIPADPGALGRS", + "RRATSIETESEVIGAAPNNVSEGSMSPSPSDISSSSRTPSPLPADRAHPLHANHALAGYNGRDANNHARY", + "ADIILDYFVTENTTVPSLLINPPPDFNPDMSIDDDEHTALHWACAMGRIRVVKLLLSAGADIFRVNSNQQ", + "TALMRATMFSNNYDLRKFPELFELLHRSILNIDRNDRTVFHHVVDLALSRGKPHAARYYMETMINRLADY", + "GDQLADILNFQDDEGETPLTMAARARSKRLVRLLLEHGADPKIRNKEGKNAEDYIIEDERFRSSPSRTGP", + "AGIELGADGLPVLPTSSLHTSEAGQRTAGRAVTLMSNLLHSLADSYDSEINTAEKKLTQAHGLLKQIQTE", + "IEDSAKVAEALHHEAQGVDEERKRVDSLQLALKHAINKRARDDLERRWSEGKQAIKRARLQAGLEPGALS", + "TSNATNAPATGDQKSKDDAKSLIEALPAGTNVKTAIAELRKQLSQVQANKTELVDKFVARAREQGTGRTM", + "AAYRRLIAAGCGGIAPDEVDAVVGVLCELLQESHTGARAGAGGERDDRARDVAMMLKGAGAAALAANAGA", + "P"] + }, + { "name" : "MBP1_WALME", + "RefSeqID" : "XP_006957051", + "UniProtID" : "I4YGC0", + "taxonomyID" : 671144, + "sequence" : [ + "MSAPPIYKACYSGVPVYEFNCKNVAVMKRRSDSWMNATQILKVANFDKPQRTRILEREVQKGTHEKVQGG", + "YGKYQGTWIPMERSVELARQYRIELLLDPIINYLPGPQSPPLAPKHATNVGSRARKSTAPAAQTLPSTSK", + "VFHPLSSTKHPAKLAAATNAKAEISDGEDASIPSSPSFKSNSSRTPSPIRINARKRKLEDEATIPSSAID", + "GSISYEDIILDYFISESTQIPALLIHPPSDFNPNMSIDDEGHTAMHWACAMGKVRVVKLLLSAGADIFRV", + "NHSEQTALMRSVMFSNNYDIRKFPQLYELLHRSTLNLDKHDRTVLHHIVDLALTKSKTHAARYYMECVLS", + "KLANYPDELADVINFQDDEGESALTLAARARSKRLVKLLLEHGADSKLPNKDGKTAEDYILEDERFRQSP", + "LLNSNHLRLHPPDTSIYAPPAHLFNSETSQNIANTSMSSVANLLESLAQSYDKEITQKERDYQQAQVILR", + "NIKTDIVEAKSNIEKMTIDSSEFEHLKHKLRELEMKLEEHSNDVYNKGWEEYSRNVDDPAIDAPSDNVQE", + "ECASLRNKIKDLQEKRISSMQELIKRQKEVGTGKKMSEYRKLISVGCGIPTTEIDAVLEMLLESLESENA", + "NKKAALASGISGALSSTSSAPSQATTSAPTGVATPGAPVPASSEKAGLLPPAPVMQ"] + } +] diff --git a/data/refTaxonomy.json b/data/refTaxonomy.json new file mode 100644 index 0000000..4d28314 --- /dev/null +++ b/data/refTaxonomy.json @@ -0,0 +1,22 @@ +[ + { "ID" : 227321, + "species" : "Aspergillus nidulans FGSC A4"}, + { "ID" : 930090, + "species" : "Bipolaris oryzae ATCC 44560"}, + { "ID" : 367110, + "species" : "Neurospora crassa OR74A"}, + { "ID" : 559292, + "species" : "Saccharomyces cerevisiae S288C"}, + { "ID" : 284812, + "species" : "Schizosaccharomyces pombe 972h-"}, + { "ID" : 240176, + "species" : "Coprinopsis cinerea okayama7#130"}, + { "ID" : 214684, + "species" : "Cryptococcus neoformans var. neoformans JEC21"}, + { "ID" : 418459, + "species" : "Puccinia graminis f. sp. tritici CRL 75-36-700-3"}, + { "ID" : 237631, + "species" : "Ustilago maydis 521"}, + { "ID" : 671144, + "species" : "Wallemia mellicola CBS 633.66"} +] diff --git a/data/referenceDomainAnnotations.txt b/data/referenceDomainAnnotations.txt new file mode 100644 index 0000000..c8d58d6 --- /dev/null +++ b/data/referenceDomainAnnotations.txt @@ -0,0 +1,115 @@ +ID protein.ID feature.ID start end note +# MBP1_SACCE +NA ref_pro_4 ref_ftr_1 4 102 APSES fold +NA ref_pro_4 ref_ftr_2 22 105 KilA-N +NA ref_pro_4 ref_ftr_4 108 122 low complexity +NA ref_pro_4 ref_ftr_4 236 241 low complexity +NA ref_pro_4 ref_ftr_4 279 307 low complexity +NA ref_pro_4 ref_ftr_4 700 717 low complexity +NA ref_pro_4 ref_ftr_4 700 717 low complexity +NA ref_pro_4 ref_ftr_5 394 423 Ankyrin +NA ref_pro_4 ref_ftr_5 427 463 Ankyrin +NA ref_pro_4 ref_ftr_5 512 541 Ankyrin +NA ref_pro_4 ref_ftr_6 381 547 Swi6 fold +NA ref_pro_4 ref_ftr_7 633 655 coiled coil +# MBP1_ASPNI +NA ref_pro_1 ref_ftr_1 9 106 APSES fold +NA ref_pro_1 ref_ftr_2 26 109 KilA-N +NA ref_pro_1 ref_ftr_4 529 534 low complexity +NA ref_pro_1 ref_ftr_5 260 289 Ankyrin +NA ref_pro_1 ref_ftr_5 381 413 Ankyrin +NA ref_pro_1 ref_ftr_6 193 402 Swi6 fold +NA ref_pro_1 ref_ftr_7 509 572 coiled coil +# MBP1_BIPOR +NA ref_pro_2 ref_ftr_1 8 106 APSES fold +NA ref_pro_2 ref_ftr_2 26 109 KilA-N +NA ref_pro_2 ref_ftr_4 134 152 low complexity +NA ref_pro_2 ref_ftr_4 267 278 low complexity +NA ref_pro_2 ref_ftr_4 670 685 low complexity +NA ref_pro_2 ref_ftr_5 266 295 Ankyrin +NA ref_pro_2 ref_ftr_5 387 416 Ankyrin +NA ref_pro_2 ref_ftr_6 253 421 Swi6 fold +NA ref_pro_2 ref_ftr_7 659 681 coiled coil +NA ref_pro_2 ref_ftr_7 500 590 coiled coil +# MBP1_NEUCR +NA ref_pro_3 ref_ftr_1 14 114 APSES fold +NA ref_pro_3 ref_ftr_2 34 117 KilA-N +NA ref_pro_3 ref_ftr_4 130 141 low complexity +NA ref_pro_3 ref_ftr_4 253 266 low complexity +NA ref_pro_3 ref_ftr_4 514 525 low complexity +NA ref_pro_3 ref_ftr_4 554 564 low complexity +NA ref_pro_3 ref_ftr_4 601 618 low complexity +NA ref_pro_3 ref_ftr_4 620 629 low complexity +NA ref_pro_3 ref_ftr_4 636 652 low complexity +NA ref_pro_3 ref_ftr_4 658 672 low complexity +NA ref_pro_3 ref_ftr_4 725 735 low complexity +NA ref_pro_3 ref_ftr_4 752 771 low complexity +NA ref_pro_3 ref_ftr_5 268 297 Ankyrin +NA ref_pro_3 ref_ftr_5 390 419 Ankyrin +NA ref_pro_3 ref_ftr_6 270 426 Swi6 fold +NA ref_pro_3 ref_ftr_7 500 550 coiled coil +# MBP1_SCHPO +NA ref_pro_5 ref_ftr_1 8 104 APSES fold +NA ref_pro_5 ref_ftr_2 25 113 KilA-N +NA ref_pro_5 ref_ftr_4 111 125 low complexity +NA ref_pro_5 ref_ftr_4 136 145 low complexity +NA ref_pro_5 ref_ftr_4 176 191 low complexity +NA ref_pro_5 ref_ftr_4 422 447 low complexity +NA ref_pro_5 ref_ftr_5 247 276 Ankyrin +NA ref_pro_5 ref_ftr_5 368 397 Ankyrin +NA ref_pro_5 ref_ftr_6 234 400 Swi6 fold +NA ref_pro_5 ref_ftr_7 457 538 coiled coil +# MBP1_COPCI +NA ref_pro_6 ref_ftr_1 5 103 APSES fold +NA ref_pro_6 ref_ftr_2 23 106 KilA-N +NA ref_pro_6 ref_ftr_4 170 191 low complexity +NA ref_pro_6 ref_ftr_4 435 450 low complexity +NA ref_pro_6 ref_ftr_4 611 626 low complexity +NA ref_pro_6 ref_ftr_5 270 299 Ankyrin +NA ref_pro_6 ref_ftr_5 389 418 Ankyrin +NA ref_pro_6 ref_ftr_5 474 509 Ankyrin +NA ref_pro_6 ref_ftr_6 257 429 Swi6 fold +NA ref_pro_6 ref_ftr_7 500 570 coiled coil +NA ref_pro_6 ref_ftr_7 651 678 coiled coil +# MBP1_CRYNE +NA ref_pro_7 ref_ftr_1 113 211 APSES fold +NA ref_pro_7 ref_ftr_2 131 215 KilA-N +NA ref_pro_7 ref_ftr_4 66 85 low complexity +NA ref_pro_7 ref_ftr_4 413 423 low complexity +NA ref_pro_7 ref_ftr_4 633 644 low complexity +NA ref_pro_7 ref_ftr_4 697 709 low complexity +NA ref_pro_7 ref_ftr_5 477 506 Ankyrin +NA ref_pro_7 ref_ftr_5 618 647 Ankyrin +NA ref_pro_7 ref_ftr_6 452 663 Swi6 fold +# MBP1_PUCGR +NA ref_pro_8 ref_ftr_1 90 187 APSES fold +NA ref_pro_8 ref_ftr_2 107 190 KilA-N +NA ref_pro_8 ref_ftr_4 208 227 low complexity +NA ref_pro_8 ref_ftr_4 273 291 low complexity +NA ref_pro_8 ref_ftr_5 442 271 Ankyrin +NA ref_pro_8 ref_ftr_5 475 509 Ankyrin +NA ref_pro_8 ref_ftr_5 561 590 Ankyrin +NA ref_pro_8 ref_ftr_6 429 601 Swi6 fold +NA ref_pro_8 ref_ftr_7 827 863 coiled coil +# MBP1_USTMA +NA ref_pro_9 ref_ftr_1 7 104 APSES fold +NA ref_pro_9 ref_ftr_2 24 107 KilA-N +NA ref_pro_9 ref_ftr_4 106 116 low complexity +NA ref_pro_9 ref_ftr_4 161 183 low complexity +NA ref_pro_9 ref_ftr_4 657 672 low complexity +NA ref_pro_9 ref_ftr_4 776 796 low complexity +NA ref_pro_9 ref_ftr_5 245 274 Ankyrin +NA ref_pro_9 ref_ftr_5 355 384 Ankyrin +NA ref_pro_9 ref_ftr_6 232 395 Swi6 fold +NA ref_pro_9 ref_ftr_7 581 609 coiled coil +# MBP1_WALME +NA ref_pro_10 ref_ftr_1 6 103 APSES fold +NA ref_pro_10 ref_ftr_2 23 106 KilA-N +NA ref_pro_10 ref_ftr_4 149 162 low complexity +NA ref_pro_10 ref_ftr_4 171 188 low complexity +NA ref_pro_10 ref_ftr_4 618 628 low complexity +NA ref_pro_10 ref_ftr_4 634 660 low complexity +NA ref_pro_10 ref_ftr_5 250 279 Ankyrin +NA ref_pro_10 ref_ftr_5 369 398 Ankyrin +NA ref_pro_10 ref_ftr_6 237 409 Swi6 fold +NA ref_pro_10 ref_ftr_7 461 585 coiled coil