Spaces:
Running
Running
Upload 24 files
Browse files- NER/word2Vec/__pycache__/word2vec.cpython-311.pyc +0 -0
- app.py +1 -1
- mtdna_classifier.py +6 -10
NER/word2Vec/__pycache__/word2vec.cpython-311.pyc
CHANGED
Binary files a/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc and b/NER/word2Vec/__pycache__/word2vec.cpython-311.pyc differ
|
|
app.py
CHANGED
@@ -41,7 +41,7 @@ def store_feedback_to_drive(accession, answer1, answer2, contact=""):
|
|
41 |
if not answer1.strip() or not answer2.strip():
|
42 |
return "⚠️ Please answer both questions before submitting."
|
43 |
|
44 |
-
feedback_file = "/
|
45 |
header = ["accession", "helpful", "improvement", "contact"]
|
46 |
row = [accession, answer1, answer2, contact]
|
47 |
file_exists = os.path.isfile(feedback_file)
|
|
|
41 |
if not answer1.strip() or not answer2.strip():
|
42 |
return "⚠️ Please answer both questions before submitting."
|
43 |
|
44 |
+
feedback_file = "data/user_fb/feedback_mtdna.csv"
|
45 |
header = ["accession", "helpful", "improvement", "contact"]
|
46 |
row = [accession, answer1, answer2, contact]
|
47 |
file_exists = os.path.isfile(feedback_file)
|
mtdna_classifier.py
CHANGED
@@ -16,13 +16,9 @@ from transformers import pipeline
|
|
16 |
#Entrez.email = "[email protected]"
|
17 |
import nltk
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
except LookupError:
|
23 |
-
nltk.download("stopwords")
|
24 |
-
nltk.download("punkt")
|
25 |
-
|
26 |
# Step 1: Get PubMed ID from Accession using EDirect
|
27 |
|
28 |
def get_info_from_accession(accession):
|
@@ -60,9 +56,9 @@ def get_doi_from_pubmed_id(id):
|
|
60 |
# Step 3.1: Extract Text
|
61 |
def get_paper_text(doi,id):
|
62 |
# create the temporary folder to contain the texts
|
63 |
-
cmd = f'mkdir {id}'
|
64 |
result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
65 |
-
saveLinkFolder = "
|
66 |
|
67 |
link = 'https://doi.org/' + doi
|
68 |
'''textsToExtract = { "doiLink":"paperText"
|
@@ -102,7 +98,7 @@ def get_paper_text(doi,id):
|
|
102 |
text += " ".join(words)
|
103 |
textsToExtract[l] = text
|
104 |
# delete folder after finishing getting text
|
105 |
-
cmd = f'rm -r
|
106 |
result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
107 |
return textsToExtract
|
108 |
# Step 3.2: Extract context
|
|
|
16 |
#Entrez.email = "[email protected]"
|
17 |
import nltk
|
18 |
|
19 |
+
nltk.download("stopwords")
|
20 |
+
nltk.download("punkt")
|
21 |
+
nltk.download('punkt_tab')
|
|
|
|
|
|
|
|
|
22 |
# Step 1: Get PubMed ID from Accession using EDirect
|
23 |
|
24 |
def get_info_from_accession(accession):
|
|
|
56 |
# Step 3.1: Extract Text
|
57 |
def get_paper_text(doi,id):
|
58 |
# create the temporary folder to contain the texts
|
59 |
+
cmd = f'mkdir data/{id}'
|
60 |
result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
61 |
+
saveLinkFolder = "data/"+id
|
62 |
|
63 |
link = 'https://doi.org/' + doi
|
64 |
'''textsToExtract = { "doiLink":"paperText"
|
|
|
98 |
text += " ".join(words)
|
99 |
textsToExtract[l] = text
|
100 |
# delete folder after finishing getting text
|
101 |
+
cmd = f'rm -r data/{id}'
|
102 |
result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
103 |
return textsToExtract
|
104 |
# Step 3.2: Extract context
|