VyLala commited on
Commit
9701354
·
verified ·
1 Parent(s): 4d4911e

Upload 24 files

Browse files
DefaultPackages/__pycache__/__init__.cpython-311.pyc CHANGED
Binary files a/DefaultPackages/__pycache__/__init__.cpython-311.pyc and b/DefaultPackages/__pycache__/__init__.cpython-311.pyc differ
 
DefaultPackages/__pycache__/openFile.cpython-311.pyc CHANGED
Binary files a/DefaultPackages/__pycache__/openFile.cpython-311.pyc and b/DefaultPackages/__pycache__/openFile.cpython-311.pyc differ
 
DefaultPackages/__pycache__/saveFile.cpython-311.pyc CHANGED
Binary files a/DefaultPackages/__pycache__/saveFile.cpython-311.pyc and b/DefaultPackages/__pycache__/saveFile.cpython-311.pyc differ
 
NER/PDF/__pycache__/pdf.cpython-311.pyc ADDED
Binary file (7.86 kB). View file
 
NER/WordDoc/__pycache__/wordDoc.cpython-311.pyc ADDED
Binary file (8.54 kB). View file
 
NER/__pycache__/cleanText.cpython-311.pyc ADDED
Binary file (5.89 kB). View file
 
NER/html/__pycache__/extractHTML.cpython-311.pyc ADDED
Binary file (11 kB). View file
 
NER/html/extractHTML.py CHANGED
@@ -126,8 +126,8 @@ class HTML():
126
  print("No tables found in HTML file")
127
  return df
128
  def mergeTextInJson(self,jsonHTML):
129
- #cl = cleanText.cleanGenText()
130
- cl = cleanGenText()
131
  htmlText = ""
132
  for sec in jsonHTML:
133
  # section is "\n\n"
 
126
  print("No tables found in HTML file")
127
  return df
128
  def mergeTextInJson(self,jsonHTML):
129
+ cl = cleanText.cleanGenText()
130
+ #cl = cleanGenText()
131
  htmlText = ""
132
  for sec in jsonHTML:
133
  # section is "\n\n"
NER/word2Vec/__pycache__/word2vec.cpython-311.pyc ADDED
Binary file (16.1 kB). View file
 
mtdna_classifier.py CHANGED
@@ -14,6 +14,14 @@ from NER.word2Vec import word2vec
14
  from transformers import pipeline
15
  # Set your email (required by NCBI Entrez)
16
  #Entrez.email = "[email protected]"
 
 
 
 
 
 
 
 
17
 
18
  # Step 1: Get PubMed ID from Accession using EDirect
19
 
 
14
  from transformers import pipeline
15
  # Set your email (required by NCBI Entrez)
16
  #Entrez.email = "[email protected]"
17
+ import nltk
18
+
19
+ try:
20
+ from nltk.corpus import stopwords
21
+ _ = stopwords.words("english")
22
+ except LookupError:
23
+ nltk.download("stopwords")
24
+ nltk.download("punkt")
25
 
26
  # Step 1: Get PubMed ID from Accession using EDirect
27