Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -129,19 +129,36 @@ Classify as "knowledge_base" if the query:
|
|
129 |
logger.error(f'Error determining query type: {e}. Defaulting to knowledge_base')
|
130 |
return QueryType.KNOWLEDGE_BASE
|
131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
def is_valid_url(url: str) -> bool:
|
133 |
-
"""Check if the provided string is a valid URL."""
|
134 |
try:
|
135 |
result = urlparse(url)
|
136 |
-
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
return False
|
139 |
|
140 |
-
def extract_urls(text: str) -> List[str]:
|
141 |
-
"""Extract URLs from text using regex pattern."""
|
142 |
-
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+'
|
143 |
-
return re.findall(url_pattern, text)
|
144 |
-
|
145 |
async def process_knowledge_base_query(query: str, chat_history: List[List[str]], temperature: float = 0.7) -> str:
|
146 |
"""Handle queries that can be answered from the knowledge base, with context."""
|
147 |
logger.info(f'Processing knowledge base query: {query}')
|
|
|
129 |
logger.error(f'Error determining query type: {e}. Defaulting to knowledge_base')
|
130 |
return QueryType.KNOWLEDGE_BASE
|
131 |
|
132 |
+
def extract_urls(text: str) -> List[str]:
|
133 |
+
"""Extract URLs from text using an improved regex pattern."""
|
134 |
+
# Updated regex pattern to better handle complex URLs with query parameters and paths
|
135 |
+
url_pattern = r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+(?:/[^)\s]*)?'
|
136 |
+
urls = re.findall(url_pattern, text)
|
137 |
+
|
138 |
+
# Clean and validate found URLs
|
139 |
+
valid_urls = []
|
140 |
+
for url in urls:
|
141 |
+
# Remove trailing punctuation or artifacts that might have been captured
|
142 |
+
url = url.rstrip('.,;:)')
|
143 |
+
if is_valid_url(url):
|
144 |
+
valid_urls.append(url)
|
145 |
+
|
146 |
+
return valid_urls
|
147 |
+
|
148 |
def is_valid_url(url: str) -> bool:
|
149 |
+
"""Check if the provided string is a valid URL with enhanced validation."""
|
150 |
try:
|
151 |
result = urlparse(url)
|
152 |
+
# Check for both scheme and netloc (domain)
|
153 |
+
has_valid_scheme = result.scheme in ('http', 'https')
|
154 |
+
has_valid_domain = bool(result.netloc)
|
155 |
+
# Additional validation to ensure complete URL structure
|
156 |
+
is_complete = all([has_valid_scheme, has_valid_domain])
|
157 |
+
return is_complete
|
158 |
+
except Exception as e:
|
159 |
+
logger.error(f'URL validation error: {e}')
|
160 |
return False
|
161 |
|
|
|
|
|
|
|
|
|
|
|
162 |
async def process_knowledge_base_query(query: str, chat_history: List[List[str]], temperature: float = 0.7) -> str:
|
163 |
"""Handle queries that can be answered from the knowledge base, with context."""
|
164 |
logger.info(f'Processing knowledge base query: {query}')
|