Sentinel-AI-Beta-Test

Running

App Files Files Community

Shreyas094 commited on Aug 31

Commit

eef32e4

•

1 Parent(s): de32af2

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -1

app.py CHANGED Viewed

@@ -471,6 +471,12 @@ def get_response_from_excel(query, model, context, num_calls=3, temperature=0.2)
         logging.info("Finished generating response for Excel data")
 def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
     logging.info(f"Getting response from Llama using model: {model}")
@@ -492,6 +498,9 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
         relevant_docs = retriever.get_relevant_documents(query)
         context = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
         prompt = f"{system_instruction}\n\nBased on the following data extracted from Excel spreadsheets:\n{context}\n\nPlease provide the Python code needed to execute the following task: '{query}'. Ensure that the code is derived directly from the dataset. If a chart is requested, use the matplotlib library to generate the appropriate visualization."
     elif file_type == "pdf":
@@ -504,6 +513,9 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
         context_str = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
         system_instruction = """You are a highly specialized financial analyst assistant with expertise in analyzing and summarizing financial documents.
         Your goal is to provide accurate, detailed, and precise summaries based on the context provided.
         Avoid making assumptions or adding information that is not explicitly supported by the context from the PDF documents."""
@@ -519,9 +531,12 @@ def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1,
             # Generate content with streaming enabled
             for response in client.text_generation(
                 prompt=prompt,
-                max_new_tokens=2000,
                 temperature=temperature,
                 stream=True,
             ):
                 if response.token.text:
                     chunk = response.token.text

         logging.info("Finished generating response for Excel data")
+def truncate_context(context, max_chars=24000):
+    """Truncate context to a maximum number of characters."""
+    if len(context) <= max_chars:
+        return context
+    return context[:max_chars] + "..."
 def get_response_from_llama(query, model, selected_docs, file_type, num_calls=1, temperature=0.2):
     logging.info(f"Getting response from Llama using model: {model}")
         relevant_docs = retriever.get_relevant_documents(query)
         context = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
+        # Truncate context
+        context = truncate_context(context)
         prompt = f"{system_instruction}\n\nBased on the following data extracted from Excel spreadsheets:\n{context}\n\nPlease provide the Python code needed to execute the following task: '{query}'. Ensure that the code is derived directly from the dataset. If a chart is requested, use the matplotlib library to generate the appropriate visualization."
     elif file_type == "pdf":
         context_str = "\n".join([doc.page_content for doc in relevant_docs if doc.metadata["source"] in selected_docs])
+        # Truncate context
+        context_str = truncate_context(context_str)
         system_instruction = """You are a highly specialized financial analyst assistant with expertise in analyzing and summarizing financial documents.
         Your goal is to provide accurate, detailed, and precise summaries based on the context provided.
         Avoid making assumptions or adding information that is not explicitly supported by the context from the PDF documents."""
             # Generate content with streaming enabled
             for response in client.text_generation(
                 prompt=prompt,
+                max_new_tokens=1000,  # Reduced to ensure we stay within token limits
                 temperature=temperature,
                 stream=True,
+                repetition_penalty=1.1,
+                top_k=50,
+                top_p=0.9,
             ):
                 if response.token.text:
                     chunk = response.token.text