Spaces:

allenchienxxx
/

PEF

Runtime error

allenchienxxx commited on Jun 19, 2023

Commit

ae55d84

1 Parent(s): 7aed46a

Update modules.py

Files changed (1) hide show

modules.py CHANGED Viewed

@@ -1,3 +1,10 @@
 def get_text_from_html(html_content):
     soup = BeautifulSoup(html_content, 'html.parser')
     # extract all the texts
@@ -16,8 +23,7 @@ def get_text(file):
             return text_content.replace("\n","")
     if text_content == "":
         return get_text_from_html(get_html_general(file));
-from bs4 import BeautifulSoup
-import email
 def get_email_html(file):
     content = email.message_from_bytes(file.read())
     html_content = ""
@@ -157,12 +163,6 @@ def get_tags_from_html(html_content):
         tag_list += [tag.name]
     # print(tag_list)
     return tag_list
-import ipaddress
-from urllib.parse import urlparse
-import urllib.request
-from bs4 import BeautifulSoup
-import re
-import email
 #get urls in html content
 def get_urls_from_html(html_content):
@@ -231,19 +231,18 @@ def get_num_FunctionWords(file):
 def get_email_html(file):
-    with open(file, 'rb') as file:
-        content = email.message_from_bytes(file.read())
-        html_content = ""
-        for part in content.walk():
-            if part.get_content_type() == 'text/html':
-                html_content += part.get_payload(decode=True).decode('iso-8859-1')
-        html_content.replace("\n","")
-        if html_content != "":
-            # print("Found html at "+file)
-            return html_content
-        else:
-            # print("No html content found at "+file)
-            return ""
 #get how many words in subject
 def get_num_sbj(file):

+import ipaddress
+from urllib.parse import urlparse
+import urllib.request
+from bs4 import BeautifulSoup
+import re
+import email
 def get_text_from_html(html_content):
     soup = BeautifulSoup(html_content, 'html.parser')
     # extract all the texts
             return text_content.replace("\n","")
     if text_content == "":
         return get_text_from_html(get_html_general(file));
 def get_email_html(file):
     content = email.message_from_bytes(file.read())
     html_content = ""
         tag_list += [tag.name]
     # print(tag_list)
     return tag_list
 #get urls in html content
 def get_urls_from_html(html_content):
 def get_email_html(file):
+    content = email.message_from_bytes(file.read())
+    html_content = ""
+    for part in content.walk():
+        if part.get_content_type() == 'text/html':
+            html_content += part.get_payload(decode=True).decode('iso-8859-1')
+    html_content.replace("\n","")
+    if html_content != "":
+        # print("Found html at "+file)
+        return html_content
+    else:
+        # print("No html content found at "+file)
+        return ""
 #get how many words in subject
 def get_num_sbj(file):