Shreyas094 commited on
Commit
790409e
1 Parent(s): 57bdb9a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -18,6 +18,10 @@ from langchain_community.llms import HuggingFaceHub
18
  from langchain_core.documents import Document
19
  from sentence_transformers import SentenceTransformer
20
  from llama_parse import LlamaParse
 
 
 
 
21
 
22
  # Environment variables and configurations
23
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -92,51 +96,73 @@ def get_model(temperature, top_p, repetition_penalty):
92
  )
93
 
94
  def duckduckgo_search(query):
 
95
  with DDGS() as ddgs:
96
- results = ddgs.text(query, max_results=5)
 
97
  return results
98
 
99
  def get_response_with_search(query, temperature, top_p, repetition_penalty, use_pdf=False):
 
 
 
100
  model = get_model(temperature, top_p, repetition_penalty)
101
  embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
102
 
103
  if use_pdf and os.path.exists("faiss_database"):
 
104
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
105
  retriever = database.as_retriever()
106
  relevant_docs = retriever.get_relevant_documents(query)
107
  context = "\n".join([f"Content: {doc.page_content}\nSource: {doc.metadata['source']}\n" for doc in relevant_docs])
108
  else:
 
109
  search_results = duckduckgo_search(query)
110
  context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
111
  for result in search_results if 'body' in result)
 
 
112
 
113
- prompt = f"""<s>[INST] Using the following context:
114
  {context}
115
  Write a detailed and complete research document that fulfills the following user request: '{query}'
116
  After the main content, provide a list of sources used in your response, prefixed with 'Sources:'.
117
  Do not include any part of these instructions in your response. [/INST]"""
118
 
 
119
  response = model(prompt)
 
120
 
121
  main_content, sources = split_response(response)
122
 
 
123
  return main_content, sources
124
 
125
  def split_response(response):
 
 
 
126
  # Remove any remaining instruction text
127
  response = re.sub(r'\[/?INST\]', '', response)
128
  response = re.sub(r'~~.*?~~', '', response, flags=re.DOTALL)
129
 
 
 
130
  # Split the response into main content and sources
131
  parts = response.split("Sources:", 1)
132
  main_content = parts[0].strip()
133
  sources = parts[1].strip() if len(parts) > 1 else ""
134
 
 
 
 
135
  return main_content, sources
136
 
137
  def chatbot_interface(message, history, temperature, top_p, repetition_penalty, use_pdf):
 
138
  main_content, sources = get_response_with_search(message, temperature, top_p, repetition_penalty, use_pdf)
139
  formatted_response = f"{main_content}\n\nSources:\n{sources}"
 
140
  return formatted_response
141
 
142
  # Gradio interface
 
18
  from langchain_core.documents import Document
19
  from sentence_transformers import SentenceTransformer
20
  from llama_parse import LlamaParse
21
+ import logging
22
+
23
+ # Set up logging
24
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
25
 
26
  # Environment variables and configurations
27
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
96
  )
97
 
98
  def duckduckgo_search(query):
99
+ logging.debug(f"Performing DuckDuckGo search for query: {query}")
100
  with DDGS() as ddgs:
101
+ results = list(ddgs.text(query, max_results=5))
102
+ logging.debug(f"Search returned {len(results)} results")
103
  return results
104
 
105
  def get_response_with_search(query, temperature, top_p, repetition_penalty, use_pdf=False):
106
+ logging.debug(f"Getting response for query: {query}")
107
+ logging.debug(f"Parameters: temperature={temperature}, top_p={top_p}, repetition_penalty={repetition_penalty}, use_pdf={use_pdf}")
108
+
109
  model = get_model(temperature, top_p, repetition_penalty)
110
  embed = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
111
 
112
  if use_pdf and os.path.exists("faiss_database"):
113
+ logging.debug("Using PDF database for context")
114
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
115
  retriever = database.as_retriever()
116
  relevant_docs = retriever.get_relevant_documents(query)
117
  context = "\n".join([f"Content: {doc.page_content}\nSource: {doc.metadata['source']}\n" for doc in relevant_docs])
118
  else:
119
+ logging.debug("Using web search for context")
120
  search_results = duckduckgo_search(query)
121
  context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
122
  for result in search_results if 'body' in result)
123
+
124
+ logging.debug(f"Context generated. Length: {len(context)} characters")
125
 
126
+ prompt = f"""<s>[INST] Using the following context:
127
  {context}
128
  Write a detailed and complete research document that fulfills the following user request: '{query}'
129
  After the main content, provide a list of sources used in your response, prefixed with 'Sources:'.
130
  Do not include any part of these instructions in your response. [/INST]"""
131
 
132
+ logging.debug("Sending prompt to model")
133
  response = model(prompt)
134
+ logging.debug(f"Received response from model. Length: {len(response)} characters")
135
 
136
  main_content, sources = split_response(response)
137
 
138
+ logging.debug(f"Split response. Main content length: {len(main_content)}, Sources length: {len(sources)}")
139
  return main_content, sources
140
 
141
  def split_response(response):
142
+ logging.debug("Splitting response")
143
+ logging.debug(f"Original response: {response[:100]}...") # Log first 100 characters
144
+
145
  # Remove any remaining instruction text
146
  response = re.sub(r'\[/?INST\]', '', response)
147
  response = re.sub(r'~~.*?~~', '', response, flags=re.DOTALL)
148
 
149
+ logging.debug(f"After removing instructions: {response[:100]}...") # Log first 100 characters
150
+
151
  # Split the response into main content and sources
152
  parts = response.split("Sources:", 1)
153
  main_content = parts[0].strip()
154
  sources = parts[1].strip() if len(parts) > 1 else ""
155
 
156
+ logging.debug(f"Main content starts with: {main_content[:100]}...") # Log first 100 characters
157
+ logging.debug(f"Sources: {sources[:100]}...") # Log first 100 characters
158
+
159
  return main_content, sources
160
 
161
  def chatbot_interface(message, history, temperature, top_p, repetition_penalty, use_pdf):
162
+ logging.debug(f"Chatbot interface called with message: {message}")
163
  main_content, sources = get_response_with_search(message, temperature, top_p, repetition_penalty, use_pdf)
164
  formatted_response = f"{main_content}\n\nSources:\n{sources}"
165
+ logging.debug(f"Formatted response: {formatted_response[:100]}...") # Log first 100 characters
166
  return formatted_response
167
 
168
  # Gradio interface