Phoenix21 commited on
Commit
9a3085c
·
verified ·
1 Parent(s): 62b9066

tried using nltk to improve input handling

Browse files
Files changed (1) hide show
  1. app.py +16 -5
app.py CHANGED
@@ -12,6 +12,7 @@ import chardet
12
  import gradio as gr
13
  import pandas as pd
14
  import json
 
15
 
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
@@ -61,7 +62,15 @@ def load_documents(file_paths):
61
  logger.error(f"Error processing file {file_path}: {e}")
62
  return docs
63
 
64
- # Simplify input validation
 
 
 
 
 
 
 
 
65
 
66
  def is_valid_input(text):
67
  """Validate the user's input question."""
@@ -71,10 +80,12 @@ def is_valid_input(text):
71
  if len(text.strip()) < 2:
72
  return False, "Input is too short. Please provide more context or details."
73
 
74
- # Check if the input has at least one valid word
75
- words = re.findall(r'\b\w+\b', text)
76
- if len(words) < 1: # Require at least one recognizable word
77
- return False, "Input appears incomplete. Please provide a meaningful question."
 
 
78
 
79
  return True, "Valid input."
80
 
 
12
  import gradio as gr
13
  import pandas as pd
14
  import json
15
+ from nltk.corpus import words
16
 
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
 
62
  logger.error(f"Error processing file {file_path}: {e}")
63
  return docs
64
 
65
+ # Enhanced input validation
66
+
67
+ # Load NLTK word list
68
+ try:
69
+ english_words = set(words.words())
70
+ except LookupError:
71
+ import nltk
72
+ nltk.download('words')
73
+ english_words = set(words.words())
74
 
75
  def is_valid_input(text):
76
  """Validate the user's input question."""
 
80
  if len(text.strip()) < 2:
81
  return False, "Input is too short. Please provide more context or details."
82
 
83
+ # Check for valid words
84
+ words_in_text = re.findall(r'\b\w+\b', text.lower())
85
+ recognized_words = [word for word in words_in_text if word in english_words]
86
+
87
+ if not recognized_words:
88
+ return False, "Input appears unclear. Please use valid words in your question."
89
 
90
  return True, "Valid input."
91