Spaces:

allenchienxxx
/

PEF

Runtime error

App Files Files Community

allenchienxxx commited on Jun 19, 2023

Commit

ec5e027

1 Parent(s): fd712e2

Update analze.py

Browse files

Files changed (1) hide show

analze.py +27 -27

analze.py CHANGED Viewed

@@ -36,8 +36,8 @@ def save_file(up_file):
         f.write(up_file.getbuffer())
         print("file save: "+up_file.name)
-def text_feature(filepath):
-    text = get_text(filepath)
     # print(text)
     if text != "":
         text = text.split()
@@ -45,20 +45,20 @@ def text_feature(filepath):
         dataf = pd.DataFrame([[textlist]], columns=['text'])
         return dataf
-def html_tags_feature(filepath):
-    tags = get_tags_from_html(get_html_general(filepath))
     taglist = ' '.join(tags) if tags !=[] else []
     dataf = pd.DataFrame([[taglist]], columns=['tags'])
     return dataf
-def extra_feature(filepath):
-    spf = check_spf(filepath)
-    dkim = check_dkim(filepath)
-    dmarc = check_dmarc(filepath)
-    deliver_receiver = check_deliver_receiver(filepath)
-    encript = check_encript(filepath)
-    onclick = get_onclicks(filepath)
-    popwindow = check_popWindow(filepath)
     extra_data_row = [spf, dkim, dmarc, deliver_receiver, encript, onclick, popwindow]
     extra_data_row = [0 if x is None else x for x in extra_data_row]
     extra_data_row = [1 if x is True else x for x in extra_data_row]
@@ -67,32 +67,32 @@ def extra_feature(filepath):
                               columns=['SPF(Pass:1,Neutral:2,Softdail:3,None:0)', 'DKIM', 'DMARC', 'Deliver-to Matches Receiver', 'Message_encrtpted', 'Onclick_events', 'Popwindow'])
     return extra_data
-def num_feature(filepath):
-    body_richness = get_body_richness(filepath)
-    func_words = get_num_FunctionWords(filepath)
-    sbj_richness = get_sbj_richness(filepath)
-    urls = get_num_urls(filepath)
-    ipurls = get_num_urls_ip(filepath)
-    imageurls = get_num_image_urls(filepath)
-    domainurls = get_num_domain_urls(filepath)
-    urlport = get_num_url_ports(filepath)
-    sen_chars = get_chars_sender(filepath)
     num_data_row = [body_richness, func_words, sbj_richness, urls, ipurls, imageurls, domainurls, urlport, sen_chars]
     num_data_row = [0 if x is None else x for x in num_data_row]
     num_data = pd.DataFrame([num_data_row],
                             columns=['body richness', 'Include function words', 'Subject richness', 'Numers of URLs', 'IPURLs', 'ImageURLs',
                                      'DomainURLs', 'URLs contain port information', 'Characters in senders'])
     return num_data
-def get_features(filepath):
     # text
-    textlist = text_feature(filepath)
     # html tags
-    taglist = html_tags_feature(filepath)
     #extra feature
-    extra_data = extra_feature(filepath)
     # Numeric data
-    num_data = num_feature(filepath)
     combined_df = pd.concat([textlist, taglist, num_data,extra_data], axis=1)
     # print(combined_df)
     return combined_df

         f.write(up_file.getbuffer())
         print("file save: "+up_file.name)
+def text_feature(file):
+    text = get_text(file)
     # print(text)
     if text != "":
         text = text.split()
         dataf = pd.DataFrame([[textlist]], columns=['text'])
         return dataf
+def html_tags_feature(file):
+    tags = get_tags_from_html(get_html_general(file))
     taglist = ' '.join(tags) if tags !=[] else []
     dataf = pd.DataFrame([[taglist]], columns=['tags'])
     return dataf
+def extra_feature(file):
+    spf = check_spf(file)
+    dkim = check_dkim(file)
+    dmarc = check_dmarc(file)
+    deliver_receiver = check_deliver_receiver(file)
+    encript = check_encript(file)
+    onclick = get_onclicks(file)
+    popwindow = check_popWindow(file)
     extra_data_row = [spf, dkim, dmarc, deliver_receiver, encript, onclick, popwindow]
     extra_data_row = [0 if x is None else x for x in extra_data_row]
     extra_data_row = [1 if x is True else x for x in extra_data_row]
                               columns=['SPF(Pass:1,Neutral:2,Softdail:3,None:0)', 'DKIM', 'DMARC', 'Deliver-to Matches Receiver', 'Message_encrtpted', 'Onclick_events', 'Popwindow'])
     return extra_data
+def num_feature(file):
+    body_richness = get_body_richness(file)
+    func_words = get_num_FunctionWords(file)
+    sbj_richness = get_sbj_richness(file)
+    urls = get_num_urls(file)
+    ipurls = get_num_urls_ip(file)
+    imageurls = get_num_image_urls(file)
+    domainurls = get_num_domain_urls(file)
+    urlport = get_num_url_ports(file)
+    sen_chars = get_chars_sender(file)
     num_data_row = [body_richness, func_words, sbj_richness, urls, ipurls, imageurls, domainurls, urlport, sen_chars]
     num_data_row = [0 if x is None else x for x in num_data_row]
     num_data = pd.DataFrame([num_data_row],
                             columns=['body richness', 'Include function words', 'Subject richness', 'Numers of URLs', 'IPURLs', 'ImageURLs',
                                      'DomainURLs', 'URLs contain port information', 'Characters in senders'])
     return num_data
+def get_features(file):
     # text
+    textlist = text_feature(file)
     # html tags
+    taglist = html_tags_feature(file)
     #extra feature
+    extra_data = extra_feature(file)
     # Numeric data
+    num_data = num_feature(file)
     combined_df = pd.concat([textlist, taglist, num_data,extra_data], axis=1)
     # print(combined_df)
     return combined_df