Shreyas094 commited on
Commit
e4b2310
1 Parent(s): 4706059

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -27
app.py CHANGED
@@ -255,33 +255,26 @@ def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
255
  logger.error(f"Error scraping full content from {url}: {e}")
256
  return ""
257
 
258
- def llm_summarize(query, documents, llm_client, temperature=0.2):
259
- system_prompt = """You are Sentinel, a world class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
260
-
261
- # Prepare the context from the documents
262
- context = "\n\n".join([f"Document {i+1}:\nTitle: {doc['title']}\nURL: {doc['url']}\n(SCRAPED CONTENT)\n{doc['full_content']}\n(/SCRAPED CONTENT)" for i, doc in enumerate(documents)])
263
 
264
  user_prompt = f"""
265
- Query: {query}
 
266
 
267
- Context: {context}
268
- Instructions: Write a detailed, long and complete research document that is informative and relevant to the user, who is a financial analyst, query based on provided context (the context consists of search results containing a brief description of the content of that page). You must use this context to answer the user's query in the best way possible.
269
- Use an unbiased and writer tone in your response. Do not repeat the text. You must provide the answer in the response itself. If the user asks for links you can provide them.
270
- If the user asks to summarize content from some links, you will be provided the entire content of the page inside the (SCRAPED CONTENT) block.
271
- You can then use this content to summarize the text.Your responses should be detailed in length be informative, accurate and relevant to the user's query.
272
- You can use markdowns to format your response. You should use bullet points to list the information.
273
- Make sure the answer is long and is informative in a research document style. You have to cite the answer using [number] notation along with the appropriate source URL embedded in the notation.
274
- You must cite the sentences with their relevant context number.
275
- You must cite each and every part of the answer so the user can know where the information is coming from. Place these citations at the end of that particular sentence.
276
- You can cite the same sentence multiple times if it is relevant to the user's query like [number1][number2].
277
- However you do not need to cite it using the same number. You can use different numbers to cite the same sentence multiple times.
278
- The number refers to the number of the search result (passed in the context) used to generate that part of the answer. Anything inside the following (SCRAPED CONTENT) block provided below is for your knowledge returned by the search engine and is not shared by the user.
279
- You have to answer question on the basis of it and cite the relevant information from it but you do not have to talk about the context in your response.
280
- If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
281
- You do not need to do this for summarization tasks. Anything between the (SCRAPED CONTENT) is retrieved from a search engine and is not a part of the conversation with the user.
282
-
283
- Please provide a comprehensive summary based on the above instructions:
284
- """
285
 
286
  messages = [
287
  {"role": "system", "content": system_prompt},
@@ -491,10 +484,23 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
491
  for doc in reranked_docs[:num_results]:
492
  full_content = scrape_full_content(doc['url'], scraper, max_chars)
493
  doc['full_content'] = full_content
494
-
 
 
 
 
 
 
 
 
 
 
 
 
 
495
  # Step 6: LLM Summarization
496
- llm_summary = llm_summarize(query, reranked_docs[:num_results], client, temperature=llm_temperature)
497
-
498
  return llm_summary
499
 
500
  except Exception as e:
 
255
  logger.error(f"Error scraping full content from {url}: {e}")
256
  return ""
257
 
258
+ def llm_summarize(json_input, llm_client, temperature=0.2):
259
+ system_prompt = """You are Sentinel, a world-class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
 
 
 
260
 
261
  user_prompt = f"""
262
+ Please provide a comprehensive summary based on the following JSON input:
263
+ {json_input}
264
 
265
+ Instructions:
266
+ 1. Analyze the query and the provided documents.
267
+ 2. Write a detailed, long, and complete research document that is informative and relevant to the user's query.
268
+ 3. Use an unbiased and professional tone in your response.
269
+ 4. Do not repeat text verbatim from the input.
270
+ 5. Provide the answer in the response itself.
271
+ 6. You can use markdown to format your response.
272
+ 7. Use bullet points to list information where appropriate.
273
+ 8. Cite the answer using [number] notation along with the appropriate source URL embedded in the notation.
274
+ 9. Place these citations at the end of the relevant sentences.
275
+ 10. You can cite the same sentence multiple times if it's relevant to different parts of your answer.
276
+
277
+ Your response should be detailed, informative, accurate, and directly relevant to the user's query."""
 
 
 
 
 
278
 
279
  messages = [
280
  {"role": "system", "content": system_prompt},
 
484
  for doc in reranked_docs[:num_results]:
485
  full_content = scrape_full_content(doc['url'], scraper, max_chars)
486
  doc['full_content'] = full_content
487
+
488
+ # Prepare JSON for LLM
489
+ llm_input = {
490
+ "query": query,
491
+ "documents": [
492
+ {
493
+ "title": doc['title'],
494
+ "url": doc['url'],
495
+ "summary": doc['summary'],
496
+ "full_content": doc['full_content']
497
+ } for doc in reranked_docs[:num_results]
498
+ ]
499
+ }
500
+
501
  # Step 6: LLM Summarization
502
+ llm_summary = llm_summarize(json.dumps(llm_input), client, temperature=llm_temperature)
503
+
504
  return llm_summary
505
 
506
  except Exception as e: