Spaces:

ProzisTech
/

key-text-image-finder

Running

MarioPrzBasto commited on May 19

Commit

506344e

verified ·

1 Parent(s): 3381080

Update text_similarity.py

Files changed (1) hide show

text_similarity.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import re
 from difflib import SequenceMatcher
 from collections import defaultdict
 def extract_special_characters(text):
     """Extracts all unique special characters from a list of texts."""
     characters = re.findall(r'[^\w\s]', text)  # Finds non-alphanumeric and non-space characters
@@ -22,6 +25,8 @@ def detect_fragments(text, key_texts, threshold=0.7):
         characters_to_not_clean = extract_special_characters(key_text)
         words = clean_text(text, characters_to_not_clean).split()
         key_words = key_text.split()
         # If the text is too short, we can't make an effective sliding window

 import re
+import logging
 from difflib import SequenceMatcher
 from collections import defaultdict
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def extract_special_characters(text):
     """Extracts all unique special characters from a list of texts."""
     characters = re.findall(r'[^\w\s]', text)  # Finds non-alphanumeric and non-space characters
         characters_to_not_clean = extract_special_characters(key_text)
         words = clean_text(text, characters_to_not_clean).split()
+        logging.info(f"Words detected: {words}")
         key_words = key_text.split()
         # If the text is too short, we can't make an effective sliding window