VyLala commited on
Commit
6da8509
·
verified ·
1 Parent(s): 9701354

Upload 24 files

Browse files
NER/word2Vec/__pycache__/word2vec.cpython-311.pyc CHANGED
Binary files a/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc and b/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc differ
 
app.py CHANGED
@@ -41,7 +41,7 @@ def store_feedback_to_drive(accession, answer1, answer2, contact=""):
41
  if not answer1.strip() or not answer2.strip():
42
  return "⚠️ Please answer both questions before submitting."
43
 
44
- feedback_file = "/content/drive/MyDrive/Customers/feedback_mtdna.csv"
45
  header = ["accession", "helpful", "improvement", "contact"]
46
  row = [accession, answer1, answer2, contact]
47
  file_exists = os.path.isfile(feedback_file)
 
41
  if not answer1.strip() or not answer2.strip():
42
  return "⚠️ Please answer both questions before submitting."
43
 
44
+ feedback_file = "data/user_fb/feedback_mtdna.csv"
45
  header = ["accession", "helpful", "improvement", "contact"]
46
  row = [accession, answer1, answer2, contact]
47
  file_exists = os.path.isfile(feedback_file)
mtdna_classifier.py CHANGED
@@ -16,13 +16,9 @@ from transformers import pipeline
16
  #Entrez.email = "[email protected]"
17
  import nltk
18
 
19
- try:
20
- from nltk.corpus import stopwords
21
- _ = stopwords.words("english")
22
- except LookupError:
23
- nltk.download("stopwords")
24
- nltk.download("punkt")
25
-
26
  # Step 1: Get PubMed ID from Accession using EDirect
27
 
28
  def get_info_from_accession(accession):
@@ -60,9 +56,9 @@ def get_doi_from_pubmed_id(id):
60
  # Step 3.1: Extract Text
61
  def get_paper_text(doi,id):
62
  # create the temporary folder to contain the texts
63
- cmd = f'mkdir {id}'
64
  result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
65
- saveLinkFolder = "/mtDNALocation/data/"+id
66
 
67
  link = 'https://doi.org/' + doi
68
  '''textsToExtract = { "doiLink":"paperText"
@@ -102,7 +98,7 @@ def get_paper_text(doi,id):
102
  text += " ".join(words)
103
  textsToExtract[l] = text
104
  # delete folder after finishing getting text
105
- cmd = f'rm -r /mtDNALocation/data/{id}'
106
  result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
107
  return textsToExtract
108
  # Step 3.2: Extract context
 
16
  #Entrez.email = "[email protected]"
17
  import nltk
18
 
19
+ nltk.download("stopwords")
20
+ nltk.download("punkt")
21
+ nltk.download('punkt_tab')
 
 
 
 
22
  # Step 1: Get PubMed ID from Accession using EDirect
23
 
24
  def get_info_from_accession(accession):
 
56
  # Step 3.1: Extract Text
57
  def get_paper_text(doi,id):
58
  # create the temporary folder to contain the texts
59
+ cmd = f'mkdir data/{id}'
60
  result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
61
+ saveLinkFolder = "data/"+id
62
 
63
  link = 'https://doi.org/' + doi
64
  '''textsToExtract = { "doiLink":"paperText"
 
98
  text += " ".join(words)
99
  textsToExtract[l] = text
100
  # delete folder after finishing getting text
101
+ cmd = f'rm -r data/{id}'
102
  result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
103
  return textsToExtract
104
  # Step 3.2: Extract context