Spaces:

Phoenix21
/

DailyWellnessMVPchatbot

Sleeping

Phoenix21 commited on Dec 20, 2024

Commit

9a3085c

verified ·

1 Parent(s): 62b9066

tried using nltk to improve input handling

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,6 +12,7 @@ import chardet
 import gradio as gr
 import pandas as pd
 import json
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -61,7 +62,15 @@ def load_documents(file_paths):
             logger.error(f"Error processing file {file_path}: {e}")
     return docs
-# Simplify input validation
 def is_valid_input(text):
     """Validate the user's input question."""
@@ -71,10 +80,12 @@ def is_valid_input(text):
     if len(text.strip()) < 2:
         return False, "Input is too short. Please provide more context or details."
-    # Check if the input has at least one valid word
-    words = re.findall(r'\b\w+\b', text)
-    if len(words) < 1:  # Require at least one recognizable word
-        return False, "Input appears incomplete. Please provide a meaningful question."
     return True, "Valid input."

 import gradio as gr
 import pandas as pd
 import json
+from nltk.corpus import words
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
             logger.error(f"Error processing file {file_path}: {e}")
     return docs
+# Enhanced input validation
+# Load NLTK word list
+try:
+    english_words = set(words.words())
+except LookupError:
+    import nltk
+    nltk.download('words')
+    english_words = set(words.words())
 def is_valid_input(text):
     """Validate the user's input question."""
     if len(text.strip()) < 2:
         return False, "Input is too short. Please provide more context or details."
+    # Check for valid words
+    words_in_text = re.findall(r'\b\w+\b', text.lower())
+    recognized_words = [word for word in words_in_text if word in english_words]
+    if not recognized_words:
+        return False, "Input appears unclear. Please use valid words in your question."
     return True, "Valid input."