Spaces:
Sleeping
Sleeping
tried using nltk to improve input handling
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ import chardet
|
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
14 |
import json
|
|
|
15 |
|
16 |
logging.basicConfig(level=logging.INFO)
|
17 |
logger = logging.getLogger(__name__)
|
@@ -61,7 +62,15 @@ def load_documents(file_paths):
|
|
61 |
logger.error(f"Error processing file {file_path}: {e}")
|
62 |
return docs
|
63 |
|
64 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
def is_valid_input(text):
|
67 |
"""Validate the user's input question."""
|
@@ -71,10 +80,12 @@ def is_valid_input(text):
|
|
71 |
if len(text.strip()) < 2:
|
72 |
return False, "Input is too short. Please provide more context or details."
|
73 |
|
74 |
-
# Check
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
78 |
|
79 |
return True, "Valid input."
|
80 |
|
|
|
12 |
import gradio as gr
|
13 |
import pandas as pd
|
14 |
import json
|
15 |
+
from nltk.corpus import words
|
16 |
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
logger = logging.getLogger(__name__)
|
|
|
62 |
logger.error(f"Error processing file {file_path}: {e}")
|
63 |
return docs
|
64 |
|
65 |
+
# Enhanced input validation
|
66 |
+
|
67 |
+
# Load NLTK word list
|
68 |
+
try:
|
69 |
+
english_words = set(words.words())
|
70 |
+
except LookupError:
|
71 |
+
import nltk
|
72 |
+
nltk.download('words')
|
73 |
+
english_words = set(words.words())
|
74 |
|
75 |
def is_valid_input(text):
|
76 |
"""Validate the user's input question."""
|
|
|
80 |
if len(text.strip()) < 2:
|
81 |
return False, "Input is too short. Please provide more context or details."
|
82 |
|
83 |
+
# Check for valid words
|
84 |
+
words_in_text = re.findall(r'\b\w+\b', text.lower())
|
85 |
+
recognized_words = [word for word in words_in_text if word in english_words]
|
86 |
+
|
87 |
+
if not recognized_words:
|
88 |
+
return False, "Input appears unclear. Please use valid words in your question."
|
89 |
|
90 |
return True, "Valid input."
|
91 |
|