Upload 24 files

Files changed (10) hide show

DefaultPackages/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/DefaultPackages/__pycache__/__init__.cpython-311.pyc and b/DefaultPackages/__pycache__/__init__.cpython-311.pyc differ

DefaultPackages/__pycache__/openFile.cpython-311.pyc CHANGED Viewed

Binary files a/DefaultPackages/__pycache__/openFile.cpython-311.pyc and b/DefaultPackages/__pycache__/openFile.cpython-311.pyc differ

DefaultPackages/__pycache__/saveFile.cpython-311.pyc CHANGED Viewed

Binary files a/DefaultPackages/__pycache__/saveFile.cpython-311.pyc and b/DefaultPackages/__pycache__/saveFile.cpython-311.pyc differ

NER/PDF/__pycache__/pdf.cpython-311.pyc ADDED Viewed

Binary file (7.86 kB). View file

NER/WordDoc/__pycache__/wordDoc.cpython-311.pyc ADDED Viewed

Binary file (8.54 kB). View file

NER/__pycache__/cleanText.cpython-311.pyc ADDED Viewed

Binary file (5.89 kB). View file

NER/html/__pycache__/extractHTML.cpython-311.pyc ADDED Viewed

Binary file (11 kB). View file

NER/html/extractHTML.py CHANGED Viewed

@@ -126,8 +126,8 @@ class HTML():
       print("No tables found in HTML file")
     return df
   def mergeTextInJson(self,jsonHTML):
-    #cl = cleanText.cleanGenText()
-    cl = cleanGenText()
     htmlText = ""
     for sec in jsonHTML:
       # section is "\n\n"

       print("No tables found in HTML file")
     return df
   def mergeTextInJson(self,jsonHTML):
+    cl = cleanText.cleanGenText()
+    #cl = cleanGenText()
     htmlText = ""
     for sec in jsonHTML:
       # section is "\n\n"

NER/word2Vec/__pycache__/word2vec.cpython-311.pyc ADDED Viewed

Binary file (16.1 kB). View file

mtdna_classifier.py CHANGED Viewed

@@ -14,6 +14,14 @@ from NER.word2Vec import word2vec
 from transformers import pipeline
 # Set your email (required by NCBI Entrez)
 #Entrez.email = "[email protected]"
 # Step 1: Get PubMed ID from Accession using EDirect

 from transformers import pipeline
 # Set your email (required by NCBI Entrez)
 #Entrez.email = "[email protected]"
+import nltk
+try:
+    from nltk.corpus import stopwords
+    _ = stopwords.words("english")
+except LookupError:
+    nltk.download("stopwords")
+    nltk.download("punkt")
 # Step 1: Get PubMed ID from Accession using EDirect