Spaces:

VyLala
/

mtDNALocation

Running

VyLala commited on Apr 13

Commit

6da8509

verified ·

1 Parent(s): 9701354

Upload 24 files

Files changed (3) hide show

NER/word2Vec/__pycache__/word2vec.cpython-311.pyc CHANGED Viewed

Binary files a/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc and b/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc differ

app.py CHANGED Viewed

@@ -41,7 +41,7 @@ def store_feedback_to_drive(accession, answer1, answer2, contact=""):
     if not answer1.strip() or not answer2.strip():
         return "⚠️ Please answer both questions before submitting."
-    feedback_file = "/content/drive/MyDrive/Customers/feedback_mtdna.csv"
     header = ["accession", "helpful", "improvement", "contact"]
     row = [accession, answer1, answer2, contact]
     file_exists = os.path.isfile(feedback_file)

     if not answer1.strip() or not answer2.strip():
         return "⚠️ Please answer both questions before submitting."
+    feedback_file = "data/user_fb/feedback_mtdna.csv"
     header = ["accession", "helpful", "improvement", "contact"]
     row = [accession, answer1, answer2, contact]
     file_exists = os.path.isfile(feedback_file)

mtdna_classifier.py CHANGED Viewed

@@ -16,13 +16,9 @@ from transformers import pipeline
 #Entrez.email = "[email protected]"
 import nltk
-try:
-    from nltk.corpus import stopwords
-    _ = stopwords.words("english")
-except LookupError:
-    nltk.download("stopwords")
-    nltk.download("punkt")
 # Step 1: Get PubMed ID from Accession using EDirect
 def get_info_from_accession(accession):
@@ -60,9 +56,9 @@ def get_doi_from_pubmed_id(id):
 # Step 3.1: Extract Text
 def get_paper_text(doi,id):
   # create the temporary folder to contain the texts
-  cmd = f'mkdir {id}'
   result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-  saveLinkFolder = "/mtDNALocation/data/"+id
   link = 'https://doi.org/' + doi
   '''textsToExtract = { "doiLink":"paperText"
@@ -102,7 +98,7 @@ def get_paper_text(doi,id):
           text += " ".join(words)
       textsToExtract[l] = text
   # delete folder after finishing getting text
-  cmd = f'rm -r /mtDNALocation/data/{id}'
   result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
   return textsToExtract
 # Step 3.2: Extract context

 #Entrez.email = "[email protected]"
 import nltk
+nltk.download("stopwords")
+nltk.download("punkt")
+nltk.download('punkt_tab')
 # Step 1: Get PubMed ID from Accession using EDirect
 def get_info_from_accession(accession):
 # Step 3.1: Extract Text
 def get_paper_text(doi,id):
   # create the temporary folder to contain the texts
+  cmd = f'mkdir data/{id}'
   result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+  saveLinkFolder = "data/"+id
   link = 'https://doi.org/' + doi
   '''textsToExtract = { "doiLink":"paperText"
           text += " ".join(words)
       textsToExtract[l] = text
   # delete folder after finishing getting text
+  cmd = f'rm -r data/{id}'
   result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
   return textsToExtract
 # Step 3.2: Extract context