Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -54,6 +54,16 @@ query_prompt = PromptTemplate(
|
|
54 |
# Keyword Chain
|
55 |
query_chain = query_prompt | llama3_json | JsonOutputParser()
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
# Google Search and Firecrawl Setup
|
58 |
def search_and_scrape(keyword):
|
59 |
search_results = google_search.results(keyword, 1)
|
@@ -116,7 +126,7 @@ def fact_check_flow(user_question):
|
|
116 |
# Join Results
|
117 |
final_markdown = []
|
118 |
for results in context_data:
|
119 |
-
final_markdown.append(results['markdown'])
|
120 |
|
121 |
final_markdown = ' '.join(final_markdown)
|
122 |
|
|
|
54 |
# Keyword Chain
|
55 |
query_chain = query_prompt | llama3_json | JsonOutputParser()
|
56 |
|
57 |
+
# Text Cleaning
|
58 |
+
def clean_text(text):
|
59 |
+
# Remove links
|
60 |
+
text = re.sub(r'http\S+', '', text)
|
61 |
+
# Remove non-alphanumeric characters (but keep numbers)
|
62 |
+
text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
|
63 |
+
# Remove HTML tags
|
64 |
+
text = re.sub(r'<[^>]+>', '', text)
|
65 |
+
return text
|
66 |
+
|
67 |
# Google Search and Firecrawl Setup
|
68 |
def search_and_scrape(keyword):
|
69 |
search_results = google_search.results(keyword, 1)
|
|
|
126 |
# Join Results
|
127 |
final_markdown = []
|
128 |
for results in context_data:
|
129 |
+
final_markdown.append(clean_text(results['markdown']))
|
130 |
|
131 |
final_markdown = ' '.join(final_markdown)
|
132 |
|