Shreyas094
commited on
Commit
•
e4b2310
1
Parent(s):
4706059
Update app.py
Browse files
app.py
CHANGED
@@ -255,33 +255,26 @@ def scrape_full_content(url, scraper="trafilatura", max_chars=3000):
|
|
255 |
logger.error(f"Error scraping full content from {url}: {e}")
|
256 |
return ""
|
257 |
|
258 |
-
def llm_summarize(
|
259 |
-
system_prompt = """You are Sentinel, a world
|
260 |
-
|
261 |
-
# Prepare the context from the documents
|
262 |
-
context = "\n\n".join([f"Document {i+1}:\nTitle: {doc['title']}\nURL: {doc['url']}\n(SCRAPED CONTENT)\n{doc['full_content']}\n(/SCRAPED CONTENT)" for i, doc in enumerate(documents)])
|
263 |
|
264 |
user_prompt = f"""
|
265 |
-
|
|
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
If you think there's nothing relevant in the search results, you can say that 'Hmm, sorry I could not find any relevant information on this topic. Would you like me to search again or ask something else?'.
|
281 |
-
You do not need to do this for summarization tasks. Anything between the (SCRAPED CONTENT) is retrieved from a search engine and is not a part of the conversation with the user.
|
282 |
-
|
283 |
-
Please provide a comprehensive summary based on the above instructions:
|
284 |
-
"""
|
285 |
|
286 |
messages = [
|
287 |
{"role": "system", "content": system_prompt},
|
@@ -491,10 +484,23 @@ def search_and_scrape(query, chat_history, num_results=5, scraper="trafilatura",
|
|
491 |
for doc in reranked_docs[:num_results]:
|
492 |
full_content = scrape_full_content(doc['url'], scraper, max_chars)
|
493 |
doc['full_content'] = full_content
|
494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
495 |
# Step 6: LLM Summarization
|
496 |
-
llm_summary = llm_summarize(
|
497 |
-
|
498 |
return llm_summary
|
499 |
|
500 |
except Exception as e:
|
|
|
255 |
logger.error(f"Error scraping full content from {url}: {e}")
|
256 |
return ""
|
257 |
|
258 |
+
def llm_summarize(json_input, llm_client, temperature=0.2):
|
259 |
+
system_prompt = """You are Sentinel, a world-class Financial analysis AI model who is expert at searching the web and answering user's queries. You are also an expert at summarizing web pages or documents and searching for content in them."""
|
|
|
|
|
|
|
260 |
|
261 |
user_prompt = f"""
|
262 |
+
Please provide a comprehensive summary based on the following JSON input:
|
263 |
+
{json_input}
|
264 |
|
265 |
+
Instructions:
|
266 |
+
1. Analyze the query and the provided documents.
|
267 |
+
2. Write a detailed, long, and complete research document that is informative and relevant to the user's query.
|
268 |
+
3. Use an unbiased and professional tone in your response.
|
269 |
+
4. Do not repeat text verbatim from the input.
|
270 |
+
5. Provide the answer in the response itself.
|
271 |
+
6. You can use markdown to format your response.
|
272 |
+
7. Use bullet points to list information where appropriate.
|
273 |
+
8. Cite the answer using [number] notation along with the appropriate source URL embedded in the notation.
|
274 |
+
9. Place these citations at the end of the relevant sentences.
|
275 |
+
10. You can cite the same sentence multiple times if it's relevant to different parts of your answer.
|
276 |
+
|
277 |
+
Your response should be detailed, informative, accurate, and directly relevant to the user's query."""
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
messages = [
|
280 |
{"role": "system", "content": system_prompt},
|
|
|
484 |
for doc in reranked_docs[:num_results]:
|
485 |
full_content = scrape_full_content(doc['url'], scraper, max_chars)
|
486 |
doc['full_content'] = full_content
|
487 |
+
|
488 |
+
# Prepare JSON for LLM
|
489 |
+
llm_input = {
|
490 |
+
"query": query,
|
491 |
+
"documents": [
|
492 |
+
{
|
493 |
+
"title": doc['title'],
|
494 |
+
"url": doc['url'],
|
495 |
+
"summary": doc['summary'],
|
496 |
+
"full_content": doc['full_content']
|
497 |
+
} for doc in reranked_docs[:num_results]
|
498 |
+
]
|
499 |
+
}
|
500 |
+
|
501 |
# Step 6: LLM Summarization
|
502 |
+
llm_summary = llm_summarize(json.dumps(llm_input), client, temperature=llm_temperature)
|
503 |
+
|
504 |
return llm_summary
|
505 |
|
506 |
except Exception as e:
|