mgokg commited on
Commit
3e7cc7e
·
verified ·
1 Parent(s): 0c010c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -191
app.py CHANGED
@@ -1,195 +1,18 @@
1
- import os
2
- import re
3
- import sys
4
- import time
5
- import shutil
6
- from concurrent.futures import ThreadPoolExecutor, as_completed
7
- from googlesearch import search
8
- import requests
9
- from bs4 import BeautifulSoup
10
- import backoff
11
- import groq
12
  import gradio as gr
 
13
 
14
- # -----------------------------------------------------------------------------
15
- # Default configuration and Prompts
16
- NUM_SEARCH = 8 # Number of links to parse from Google
17
- SEARCH_TIME_LIMIT = 7 # Max seconds to request website sources before skipping to the next URL
18
- TOTAL_TIMEOUT = 25 # Overall timeout for all operations
19
- MAX_CONTENT = 500 # Number of words to add to LLM context for each search result
20
- MAX_TOKENS = 1000 # Maximum number of tokens LLM generates
21
- LLM_MODEL = 'llama3-70b-8192' # Groq model
22
-
23
- system_prompt_search = """You are a helpful assistant whose primary goal is to decide if a user's query requires a Google search."""
24
- search_prompt = """
25
- Decide if a user's query requires a Google search. You should use Google search for most queries to find the most accurate and updated information. Follow these conditions:
26
-
27
- - If the query does not require Google search, you must output "ns", short for no search.
28
- - If the query requires Google search, you must respond with a reformulated user query for Google search.
29
- - User query may sometimes refer to previous messages. Make sure your Google search considers the entire message history.
30
-
31
- User Query:
32
- {query}
33
- """
34
-
35
- system_prompt_answer = """You are a helpful assistant who is expert at answering user's queries"""
36
- answer_prompt = """Generate a response that is informative and relevant to the user's query
37
- User Query:
38
- {query}
39
- """
40
-
41
- system_prompt_cited_answer = """You are a helpful assistant who is expert at answering user's queries based on the cited context."""
42
- cited_answer_prompt = """
43
- Provide a relevant, informative response to the user's query using the given context (search results with [citation number](website link) and brief descriptions).
44
-
45
- - Answer directly without referring the user to any external links.
46
- - Use an unbiased, journalistic tone and avoid repeating text.
47
- - Format your response in markdown with bullet points for clarity.
48
- - Cite all information using [citation number](website link) notation, matching each part of your answer to its source.
49
-
50
- Context Block:
51
- {context_block}
52
-
53
- User Query:
54
- {query}
55
- """
56
- # -----------------------------------------------------------------------------
57
-
58
- # Set up Groq API key
59
- GROQ_API_KEY = os.getenv('GROQ_API_KEY')
60
- if not GROQ_API_KEY:
61
- raise ValueError("Groq API key is not set. Please set the GROQ_API_KEY environment variable.")
62
-
63
- client = groq.Client(api_key=GROQ_API_KEY)
64
-
65
- def trace_function_factory(start):
66
- """Create a trace function to timeout request"""
67
- def trace_function(frame, event, arg):
68
- if time.time() - start > TOTAL_TIMEOUT:
69
- raise TimeoutError('Website fetching timed out')
70
- return trace_function
71
- return trace_function
72
-
73
- def fetch_webpage(url, timeout):
74
- """Fetch the content of a webpage given a URL and a timeout."""
75
- start = time.time()
76
- sys.settrace(trace_function_factory(start))
77
- try:
78
- print(f"Fetching link: {url}")
79
- response = requests.get(url, timeout=timeout)
80
- response.raise_for_status()
81
- soup = BeautifulSoup(response.text, 'lxml')
82
- paragraphs = soup.find_all('p')
83
- page_text = ' '.join([para.get_text() for para in paragraphs])
84
- return url, page_text
85
- except (requests.exceptions.RequestException, TimeoutError) as e:
86
- print(f"Error fetching {url}: {e}")
87
- finally:
88
- sys.settrace(None)
89
- return url, None
90
-
91
- def parse_google_results(query, num_search=NUM_SEARCH, search_time_limit=SEARCH_TIME_LIMIT):
92
- """Perform a Google search and parse the content of the top results."""
93
- urls = list(search(query, num_results=num_search))
94
- max_workers = os.cpu_count() or 1 # Fallback to 1 if os.cpu_count() returns None
95
- with ThreadPoolExecutor(max_workers=max_workers) as executor:
96
- future_to_url = {executor.submit(fetch_webpage, url, search_time_limit): url for url in urls}
97
- return {url: page_text for future in as_completed(future_to_url) if (url := future.result()[0]) and (page_text := future.result()[1])}
98
-
99
- def save_markdown(content, file_path):
100
- """Save content to a Markdown file."""
101
- with open(file_path, 'a') as file:
102
- file.write(content)
103
-
104
- @backoff.on_exception(backoff.expo, (groq._exceptions.RateLimitError, groq._exceptions.APITimeoutError))
105
- def llm_check_search(query, file_path, msg_history=None, llm_model=LLM_MODEL):
106
- """Check if query requires search and execute Google search."""
107
- prompt = search_prompt.format(query=query)
108
- msg_history = msg_history or []
109
- new_msg_history = msg_history + [{"role": "user", "content": prompt}]
110
- response = client.chat.completions.create(
111
- model=llm_model,
112
- messages=[{"role": "system", "content": system_prompt_search}, *new_msg_history],
113
- max_tokens=30
114
- ).choices[0].message.content
115
-
116
- # Check if the response contains "ns"
117
- cleaned_response = response.lower().strip()
118
- if re.fullmatch(r"\bns\b", cleaned_response):
119
- print("No Google search required.")
120
- return None
121
- else:
122
- print(f"Performing Google search: {cleaned_response}")
123
- search_dic = parse_google_results(cleaned_response)
124
- # Format search result in dic into markdown format
125
- search_result_md = "\n".join([f"{number+1}. {link}" for number, link in enumerate(search_dic.keys())])
126
- save_markdown(f"## Sources\n{search_result_md}\n\n", file_path)
127
- return search_dic
128
-
129
- @backoff.on_exception(backoff.expo, (groq._exceptions.RateLimitError, groq._exceptions.APITimeoutError))
130
- def llm_answer(query, file_path, msg_history=None, search_dic=None, llm_model=LLM_MODEL, max_content=MAX_CONTENT, max_tokens=MAX_TOKENS, debug=False):
131
- """Build the prompt for the language model including the search results context."""
132
- if search_dic:
133
- context_block = "\n".join([f"[{i+1}]({url}): {content[:max_content]}" for i, (url, content) in enumerate(search_dic.items())])
134
- prompt = cited_answer_prompt.format(context_block=context_block, query=query)
135
- system_prompt = system_prompt_cited_answer
136
- else:
137
- prompt = answer_prompt.format(query=query)
138
- system_prompt = system_prompt_answer
139
-
140
- """Generate a response using the Groq language model with stream completion"""
141
- msg_history = msg_history or []
142
- new_msg_history = msg_history + [{"role": "user", "content": prompt}]
143
- response = client.chat.completions.create(
144
- model=llm_model,
145
- messages=[{"role": "system", "content": system_prompt}, *new_msg_history],
146
- max_tokens=max_tokens,
147
- stream=True
148
- )
149
-
150
- print("\n" + "*" * 20 + " LLM START " + "*" * 20)
151
- save_markdown(f"## Answer\n", file_path)
152
- content = []
153
- for chunk in response:
154
- chunk_content = chunk.choices[0].delta.content
155
- if chunk_content:
156
- content.append(chunk_content)
157
- print(chunk_content, end="")
158
- save_markdown(chunk_content, file_path)
159
-
160
- print("\n" + "*" * 21 + " LLM END " + "*" * 21 + "\n")
161
- # Change the line for the next question
162
- save_markdown("\n\n", file_path)
163
- new_msg_history = new_msg_history + [{"role": "assistant", "content": ''.join(content)}]
164
-
165
- return new_msg_history, ''.join(content)
166
-
167
- def main_interface(query, file_path="playground.md"):
168
- """Main function to execute the search, generate response, and save to markdown."""
169
- msg_history = None
170
- save_path = None
171
- # Start with an empty file
172
- with open(file_path, 'w') as file:
173
- pass
174
-
175
- save_markdown(f"# {query}\n\n", file_path)
176
- search_dic = llm_check_search(query, file_path, msg_history)
177
- msg_history, response = llm_answer(query, file_path, msg_history, search_dic)
178
-
179
- return response
180
-
181
- # Create Gradio interface
182
- def gradio_interface(query):
183
- response = main_interface(query)
184
- return response
185
-
186
- iface = gr.Interface(
187
- fn=gradio_interface,
188
- inputs=gr.Textbox(label="Enter your question"),
189
- outputs=gr.Textbox(label="Response"),
190
- title="AI Question Answering System",
191
- description="Ask your questions and get informative answers."
192
  )
193
 
194
- if __name__ == "__main__":
195
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from googlesearch import search
3
 
4
+ def google_search(query):
5
+ results = []
6
+ for j in search(query, num_results=5):
7
+ results.append(j)
8
+ return "\n".join(results)
9
+
10
+ demo = gr.Interface(
11
+ fn=google_search,
12
+ inputs=gr.Textbox(lines=2, placeholder="Geben Sie Ihre Suchanfrage ein..."),
13
+ outputs="text",
14
+ title="Google Search mit Gradio",
15
+ description="Geben Sie eine Suchanfrage ein und erhalten Sie die Top 5 Google-Suchergebnisse."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
 
18
+ demo.launch()