matthewfarant commited on
Commit
08e7866
·
verified ·
1 Parent(s): a6beb20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -54,6 +54,16 @@ query_prompt = PromptTemplate(
54
  # Keyword Chain
55
  query_chain = query_prompt | llama3_json | JsonOutputParser()
56
 
 
 
 
 
 
 
 
 
 
 
57
  # Google Search and Firecrawl Setup
58
  def search_and_scrape(keyword):
59
  search_results = google_search.results(keyword, 1)
@@ -116,7 +126,7 @@ def fact_check_flow(user_question):
116
  # Join Results
117
  final_markdown = []
118
  for results in context_data:
119
- final_markdown.append(results['markdown'])
120
 
121
  final_markdown = ' '.join(final_markdown)
122
 
 
54
  # Keyword Chain
55
  query_chain = query_prompt | llama3_json | JsonOutputParser()
56
 
57
+ # Text Cleaning
58
+ def clean_text(text):
59
+ # Remove links
60
+ text = re.sub(r'http\S+', '', text)
61
+ # Remove non-alphanumeric characters (but keep numbers)
62
+ text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
63
+ # Remove HTML tags
64
+ text = re.sub(r'<[^>]+>', '', text)
65
+ return text
66
+
67
  # Google Search and Firecrawl Setup
68
  def search_and_scrape(keyword):
69
  search_results = google_search.results(keyword, 1)
 
126
  # Join Results
127
  final_markdown = []
128
  for results in context_data:
129
+ final_markdown.append(clean_text(results['markdown']))
130
 
131
  final_markdown = ' '.join(final_markdown)
132