Shreyas094 commited on
Commit
e977112
·
verified ·
1 Parent(s): 4c78583

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -204
app.py CHANGED
@@ -1,257 +1,249 @@
1
  import os
2
- import logging
3
- import asyncio
4
- import random # Import random for token selection
5
- from typing import AsyncGenerator, Tuple
6
  import gradio as gr
7
- from huggingface_hub import InferenceClient
8
- from langchain.embeddings import HuggingFaceEmbeddings
9
- from langchain.vectorstores import FAISS
10
- from langchain.schema import Document
11
  from duckduckgo_search import DDGS
 
 
 
 
 
 
 
12
 
13
- # Configure logging
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
15
 
16
- # List of Hugging Face tokens
17
- huggingface_tokens = [
18
- os.environ.get("HUGGINGFACE_TOKEN_1"),
19
- os.environ.get("HUGGINGFACE_TOKEN_2"),
20
- os.environ.get("HUGGINGFACE_TOKEN_3")
21
- ]
22
-
23
- # Function to get a random Hugging Face token
24
- def get_random_token():
25
- return random.choice(huggingface_tokens)
26
 
27
  MODELS = [
28
  "mistralai/Mistral-7B-Instruct-v0.3",
29
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
30
- "mistralai/Mistral-Nemo-Instruct-2407",
31
- "meta-llama/Meta-Llama-3.1-8B-Instruct",
32
- "meta-llama/Meta-Llama-3.1-70B-Instruct",
33
- "google/gemma-2-9b-it",
34
- "google/gemma-2-27b-it"
35
  ]
36
 
37
- DEFAULT_SYSTEM_PROMPT = """You are a world-class financial AI assistant, capable of complex reasoning and reflection.
38
- Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags.
39
- Providing comprehensive and accurate information based on web search results is essential.
40
- Your goal is to synthesize the given context into a coherent and detailed response that directly addresses the user's query.
41
- Please ensure that your response is well-structured and factual.
42
- If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags."""
43
-
44
  def get_embeddings():
45
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
46
 
47
  def duckduckgo_search(query):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  try:
49
- with DDGS() as ddgs:
50
- results = ddgs.text(query, max_results=5)
51
- logging.info(f"Search completed for query: {query}")
52
- return results
 
53
  except Exception as e:
54
- logging.error(f"Error during DuckDuckGo search: {str(e)}")
55
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def create_web_search_vectors(search_results):
58
  embed = get_embeddings()
 
59
  documents = []
60
  for result in search_results:
61
  if 'body' in result:
62
  content = f"{result['title']}\n{result['body']}\nSource: {result['href']}"
63
  documents.append(Document(page_content=content, metadata={"source": result['href']}))
64
- logging.info(f"Created vectors for {len(documents)} search results.")
65
  return FAISS.from_documents(documents, embed)
66
 
67
- def create_context(search_results, use_embeddings, query):
68
- if use_embeddings:
69
- web_search_database = create_web_search_vectors(search_results)
70
- retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
71
- relevant_docs = retriever.get_relevant_documents(query)
72
- return "\n".join([doc.page_content for doc in relevant_docs])
73
- else:
74
- return "\n".join([f"{result['title']}\n{result['body']}" for result in search_results])
75
-
76
- async def get_response_with_search(query: str, system_prompt: str, model: str, use_embeddings: bool, history=None, num_calls: int = 3, temperature: float = 0.2) -> AsyncGenerator[Tuple[str, str], None]:
77
  search_results = duckduckgo_search(query)
 
78
 
79
- if not search_results:
80
- logging.warning(f"No web search results found for query: {query}")
81
  yield "No web search results available. Please try again.", ""
82
  return
83
-
84
- sources = [result['href'] for result in search_results if 'href' in result]
85
- source_list_str = "\n".join(sources)
86
-
87
- context = create_context(search_results, use_embeddings, query)
88
- logging.info(f"Context created for query: {query}")
89
-
90
- user_message = f"""Using the following context from web search results:
91
  {context}
 
 
92
 
93
- Write a detailed and complete research document that fulfills the following user request: '{query}'."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- messages = [
96
- {"role": "system", "content": system_prompt},
97
- {"role": "user", "content": user_message}
98
  ]
99
 
100
- if history:
101
- messages = history + messages
 
 
 
 
102
 
103
- # Get a random token for the API call
104
- token = get_random_token()
105
- client = InferenceClient(model, token=token)
106
  full_response = ""
107
-
108
- for call in range(num_calls):
109
  try:
110
- response = await asyncio.to_thread(
111
- client.chat_completion,
112
- messages=messages,
113
- max_tokens=6000,
114
- temperature=temperature,
115
- top_p=0.8,
116
- )
117
-
118
- if response is None or not isinstance(response, dict) or 'choices' not in response:
119
- logging.error(f"API call {call + 1} returned an invalid response: {response}")
120
- if call == num_calls - 1:
121
- yield "The API returned an invalid response. Please try again later.", ""
122
- continue
123
-
124
- new_content = response['choices'][0]['message']['content']
125
- full_response += new_content
126
- yield full_response, ""
127
-
128
- if full_response:
129
- break # If we got a valid response, exit the loop
130
-
131
  except Exception as e:
132
- logging.error(f"Error in API call {call + 1}: {str(e)}")
133
- if call == num_calls - 1:
134
- yield f"An error occurred during API calls: {str(e)}. Please try again later.", ""
135
-
136
- await asyncio.sleep(1) # 1 second delay between calls
137
-
138
  if not full_response:
139
- logging.warning("No response generated from the model")
140
- yield "No response generated from the model. Please try again.", ""
141
- else:
142
- yield f"{full_response}\n\nSources:\n{source_list_str}", ""
143
-
144
- def process_history(history):
145
- chat_history = []
146
- if isinstance(history, str):
147
- # If history is a string (like the system prompt), add it as a system message
148
- chat_history.append({"role": "system", "content": history})
149
- elif isinstance(history, list):
150
- for entry in history:
151
- if isinstance(entry, (list, tuple)) and len(entry) == 2:
152
- human, assistant = entry
153
- chat_history.append({"role": "user", "content": human})
154
- if assistant:
155
- chat_history.append({"role": "assistant", "content": assistant})
156
- elif isinstance(entry, str):
157
- # If it's a string, assume it's a user message
158
- chat_history.append({"role": "user", "content": entry})
159
- return chat_history
160
-
161
- async def respond(message, system_prompt, history, model, temperature, num_calls, use_embeddings):
162
- logging.info(f"User Query: {message}")
163
- logging.info(f"Model Used: {model}")
164
- logging.info(f"Temperature: {temperature}")
165
- logging.info(f"Number of API Calls: {num_calls}")
166
- logging.info(f"Use Embeddings: {use_embeddings}")
167
- logging.info(f"System Prompt: {system_prompt}")
168
- logging.info(f"History: {history}")
169
-
170
- chat_history = process_history(history)
171
-
172
- try:
173
- async for main_content, sources in get_response_with_search(
174
- message,
175
- system_prompt,
176
- model,
177
- use_embeddings,
178
- history=chat_history,
179
- num_calls=num_calls,
180
- temperature=temperature
181
- ):
182
- yield main_content
183
-
184
- if sources:
185
- yield f"\n\nSources:\n{sources}"
186
 
187
- except asyncio.CancelledError:
188
- logging.warning("The operation was cancelled.")
189
- yield "The operation was cancelled. Please try again."
190
- except Exception as e:
191
- logging.error(f"Error in respond function: {str(e)}")
192
- yield f"An error occurred: {str(e)}"
193
 
194
  css = """
195
  /* Fine-tune chatbox size */
196
- .chatbot-container {
197
- height: 600px !important;
198
- width: 100% !important;
199
- }
200
- .chatbot-container > div {
201
- height: 100%;
202
- width: 100%;
203
- }
204
  """
205
 
206
- # Gradio interface setup
207
- def create_gradio_interface():
208
- custom_placeholder = "Enter your question here for web search."
 
 
 
 
 
209
 
210
- demo = gr.ChatInterface(
211
- fn=respond,
212
- additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=True, render=False),
213
- additional_inputs=[
214
- gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, lines=6, label="System Prompt", placeholder="Enter your system prompt here"),
215
- gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
216
- gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
217
- gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
218
- gr.Checkbox(label="Use Embeddings", value=False),
219
- ],
220
- title="AI-powered Web Search Assistant",
221
- description="Use web search to answer questions or generate summaries.",
222
- theme=gr.Theme.from_hub("allenai/gradio-theme"),
223
- css=css,
224
- examples=[
225
- ["What are the latest developments in artificial intelligence?"],
226
- ["Explain the concept of quantum computing."],
227
- ["What are the environmental impacts of renewable energy?"]
228
- ],
229
- cache_examples=False,
230
- analytics_enabled=False,
231
- textbox=gr.Textbox(placeholder=custom_placeholder, container=False, scale=7),
232
- chatbot=gr.Chatbot(
233
- show_copy_button=True,
234
- likeable=True,
235
- layout="bubble",
236
- height=400,
237
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  )
239
-
240
- with demo:
241
- gr.Markdown("""
242
- ## How to use
243
- 1. Enter your question in the chat interface.
244
- 2. Optionally, modify the System Prompt to guide the AI's behavior.
245
- 3. Select the model you want to use from the dropdown.
246
- 4. Adjust the Temperature to control the randomness of the response.
247
- 5. Set the Number of API Calls to determine how many times the model will be queried.
248
- 6. Check or uncheck the "Use Embeddings" box to toggle between using embeddings or direct text summarization.
249
- 7. Press Enter or click the submit button to get your answer.
250
- 8. Use the provided examples or ask your own questions.
251
- """)
252
-
253
- return demo
254
 
255
  if __name__ == "__main__":
256
- demo = create_gradio_interface()
257
  demo.launch(share=True)
 
1
  import os
2
+ import json
3
+ import re
 
 
4
  import gradio as gr
5
+ import requests
 
 
 
6
  from duckduckgo_search import DDGS
7
+ from typing import List
8
+ from pydantic import BaseModel, Field
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_community.embeddings import HuggingFaceEmbeddings
11
+ from langchain_core.documents import Document
12
+ from huggingface_hub import InferenceClient
13
+ import logging
14
 
15
+ # Set up basic configuration for logging
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
17
 
18
+ # Environment variables and configurations
19
+ huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
20
+ ACCOUNT_ID = os.environ.get("CLOUDFARE_ACCOUNT_ID")
21
+ API_TOKEN = os.environ.get("CLOUDFLARE_AUTH_TOKEN")
22
+ API_BASE_URL = "https://api.cloudflare.com/client/v4/accounts/a17f03e0f049ccae0c15cdcf3b9737ce/ai/run/"
 
 
 
 
 
23
 
24
  MODELS = [
25
  "mistralai/Mistral-7B-Instruct-v0.3",
26
  "mistralai/Mixtral-8x7B-Instruct-v0.1",
27
+ "@cf/meta/llama-3.1-8b-instruct",
28
+ "mistralai/Mistral-Nemo-Instruct-2407"
 
 
 
29
  ]
30
 
 
 
 
 
 
 
 
31
  def get_embeddings():
32
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
33
 
34
  def duckduckgo_search(query):
35
+ with DDGS() as ddgs:
36
+ results = ddgs.text(query, max_results=5)
37
+ return results
38
+
39
+ class CitingSources(BaseModel):
40
+ sources: List[str] = Field(
41
+ ...,
42
+ description="List of sources to cite. Should be an URL of the source."
43
+ )
44
+
45
+ def chatbot_interface(message, history, model, temperature, num_calls):
46
+ if not message.strip():
47
+ return "", history
48
+
49
+ history = history + [(message, "")]
50
+
51
  try:
52
+ for response in respond(message, history, model, temperature, num_calls):
53
+ history[-1] = (message, response)
54
+ yield history
55
+ except gr.CancelledError:
56
+ yield history
57
  except Exception as e:
58
+ logging.error(f"Unexpected error in chatbot_interface: {str(e)}")
59
+ history[-1] = (message, f"An unexpected error occurred: {str(e)}")
60
+ yield history
61
+
62
+ def retry_last_response(history, model, temperature, num_calls):
63
+ if not history:
64
+ return history
65
+
66
+ last_user_msg = history[-1][0]
67
+ history = history[:-1] # Remove the last response
68
+
69
+ return chatbot_interface(last_user_msg, history, model, temperature, num_calls)
70
+
71
+ def respond(message, history, model, temperature, num_calls):
72
+ logging.info(f"User Query: {message}")
73
+ logging.info(f"Model Used: {model}")
74
+
75
+ try:
76
+ for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
77
+ response = f"{main_content}\n\n{sources}"
78
+ first_line = response.split('\n')[0] if response else ''
79
+ yield response
80
+ except Exception as e:
81
+ logging.error(f"Error with {model}: {str(e)}")
82
+ yield f"An error occurred with the {model} model: {str(e)}. Please try again or select a different model."
83
 
84
  def create_web_search_vectors(search_results):
85
  embed = get_embeddings()
86
+
87
  documents = []
88
  for result in search_results:
89
  if 'body' in result:
90
  content = f"{result['title']}\n{result['body']}\nSource: {result['href']}"
91
  documents.append(Document(page_content=content, metadata={"source": result['href']}))
92
+
93
  return FAISS.from_documents(documents, embed)
94
 
95
+ def get_response_with_search(query, model, num_calls=3, temperature=0.2):
 
 
 
 
 
 
 
 
 
96
  search_results = duckduckgo_search(query)
97
+ web_search_database = create_web_search_vectors(search_results)
98
 
99
+ if not web_search_database:
 
100
  yield "No web search results available. Please try again.", ""
101
  return
102
+
103
+ retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
104
+ relevant_docs = retriever.get_relevant_documents(query)
105
+
106
+ context = "\n".join([doc.page_content for doc in relevant_docs])
107
+
108
+ prompt = f"""Using the following context from web search results:
 
109
  {context}
110
+ Write a detailed and complete research document that fulfills the following user request: '{query}'
111
+ After writing the document, please provide a list of sources used in your response."""
112
 
113
+ if model == "@cf/meta/llama-3.1-8b-instruct":
114
+ # Use Cloudflare API
115
+ for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature):
116
+ yield response, "" # Yield streaming response without sources
117
+ else:
118
+ # Use Hugging Face API
119
+ client = InferenceClient(model, token=huggingface_token)
120
+
121
+ main_content = ""
122
+ for i in range(num_calls):
123
+ for message in client.chat_completion(
124
+ messages=[{"role": "user", "content": prompt}],
125
+ max_tokens=10000,
126
+ temperature=temperature,
127
+ stream=True,
128
+ ):
129
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
130
+ chunk = message.choices[0].delta.content
131
+ main_content += chunk
132
+ yield main_content, "" # Yield partial main content without sources
133
+
134
+ def get_response_from_cloudflare(prompt, context, query, num_calls=3, temperature=0.2):
135
+ headers = {
136
+ "Authorization": f"Bearer {API_TOKEN}",
137
+ "Content-Type": "application/json"
138
+ }
139
+ model = "@cf/meta/llama-3.1-8b-instruct"
140
+
141
+ instruction = f"""Using the following context:
142
+ {context}
143
+ Write a detailed and complete research document that fulfills the following user request: '{query}'
144
+ After writing the document, please provide a list of sources used in your response."""
145
 
146
+ inputs = [
147
+ {"role": "system", "content": instruction},
148
+ {"role": "user", "content": query}
149
  ]
150
 
151
+ payload = {
152
+ "messages": inputs,
153
+ "stream": True,
154
+ "temperature": temperature,
155
+ "max_tokens": 32000
156
+ }
157
 
 
 
 
158
  full_response = ""
159
+ for i in range(num_calls):
 
160
  try:
161
+ with requests.post(f"{API_BASE_URL}{model}", headers=headers, json=payload, stream=True) as response:
162
+ if response.status_code == 200:
163
+ for line in response.iter_lines():
164
+ if line:
165
+ try:
166
+ json_response = json.loads(line.decode('utf-8').split('data: ')[1])
167
+ if 'response' in json_response:
168
+ chunk = json_response['response']
169
+ full_response += chunk
170
+ yield full_response
171
+ except (json.JSONDecodeError, IndexError) as e:
172
+ logging.error(f"Error parsing streaming response: {str(e)}")
173
+ continue
174
+ else:
175
+ logging.error(f"HTTP Error: {response.status_code}, Response: {response.text}")
176
+ yield f"I apologize, but I encountered an HTTP error: {response.status_code}. Please try again later."
 
 
 
 
 
177
  except Exception as e:
178
+ logging.error(f"Error in generating response from Cloudflare: {str(e)}")
179
+ yield f"I apologize, but an error occurred: {str(e)}. Please try again later."
180
+
 
 
 
181
  if not full_response:
182
+ yield "I apologize, but I couldn't generate a response at this time. Please try again later."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
+ def vote(data: gr.LikeData):
185
+ if data.liked:
186
+ print(f"You upvoted this response: {data.value}")
187
+ else:
188
+ print(f"You downvoted this response: {data.value}")
 
189
 
190
  css = """
191
  /* Fine-tune chatbox size */
 
 
 
 
 
 
 
 
192
  """
193
 
194
+ def initial_conversation():
195
+ return [
196
+ (None, "Welcome! I'm your AI assistant for web search. Here's how you can use me:\n\n"
197
+ "1. Ask me any question, and I'll search the web for information.\n"
198
+ "2. You can adjust the model, temperature, and number of API calls for fine-tuned responses.\n"
199
+ "3. For any queries, feel free to reach out @[email protected] or discord - shreyas094\n\n"
200
+ "To get started, ask me a question!")
201
+ ]
202
 
203
+ demo = gr.ChatInterface(
204
+ respond,
205
+ additional_inputs=[
206
+ gr.Dropdown(choices=MODELS, label="Select Model", value=MODELS[3]),
207
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.2, step=0.1, label="Temperature"),
208
+ gr.Slider(minimum=1, maximum=5, value=1, step=1, label="Number of API Calls"),
209
+ ],
210
+ title="AI-powered Web Search Assistant",
211
+ description="Ask questions and get answers from web search results.",
212
+ theme=gr.themes.Soft(
213
+ primary_hue="orange",
214
+ secondary_hue="amber",
215
+ neutral_hue="gray",
216
+ font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]
217
+ ).set(
218
+ body_background_fill_dark="#0c0505",
219
+ block_background_fill_dark="#0c0505",
220
+ block_border_width="1px",
221
+ block_title_background_fill_dark="#1b0f0f",
222
+ input_background_fill_dark="#140b0b",
223
+ button_secondary_background_fill_dark="#140b0b",
224
+ border_color_accent_dark="#1b0f0f",
225
+ border_color_primary_dark="#1b0f0f",
226
+ background_fill_secondary_dark="#0c0505",
227
+ color_accent_soft_dark="transparent",
228
+ code_background_fill_dark="#140b0b"
229
+ ),
230
+ css=css,
231
+ examples=[
232
+ ["What are the latest developments in artificial intelligence?"],
233
+ ["Can you explain the basics of quantum computing?"],
234
+ ["What are the current global economic trends?"]
235
+ ],
236
+ cache_examples=False,
237
+ analytics_enabled=False,
238
+ textbox=gr.Textbox(placeholder="Ask a question", container=False, scale=7),
239
+ chatbot = gr.Chatbot(
240
+ show_copy_button=True,
241
+ likeable=True,
242
+ layout="bubble",
243
+ height=400,
244
+ value=initial_conversation()
245
  )
246
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  if __name__ == "__main__":
 
249
  demo.launch(share=True)