Spaces:

muhammadsalmanalfaridzi
/

RAG-GutHib

Sleeping

App Files Files Community

muhammadsalmanalfaridzi commited on Jan 22

Commit

a0cd1a3

verified ·

1 Parent(s): f21f5b6

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -21

app.py CHANGED Viewed

@@ -3,10 +3,11 @@ import gc
 import tempfile
 import uuid
 import pandas as pd
-import openai  # Import openai for Sambanova API
 from gitingest import ingest
 from llama_index.core import Settings
 from llama_index.core import PromptTemplate
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
@@ -14,6 +15,9 @@ from llama_index.core.node_parser import MarkdownNodeParser
 import streamlit as st
 if "id" not in st.session_state:
     st.session_state.id = uuid.uuid4()
     st.session_state.file_cache = {}
@@ -21,15 +25,18 @@ if "id" not in st.session_state:
 session_id = st.session_state.id
 client = None
-# Update the load_llm function to use Sambanova's API
 @st.cache_resource
 def load_llm():
-    # Initialize the Sambanova OpenAI client
-    client = openai.OpenAI(
-        api_key=os.environ.get("SAMBANOVA_API_KEY"),
-        base_url="https://api.sambanova.ai/v1",
     )
-    return client
 def reset_chat():
     st.session_state.messages = []
@@ -77,26 +84,26 @@ with st.sidebar:
                     docs = loader.load_data()
                     # setup llm & embedding model
-                    llm = load_llm()  # Load the Sambanova LLM client
                     embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
                     # Creating an index over loaded data
                     Settings.embed_model = embed_model
                     node_parser = MarkdownNodeParser()
                     index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
-                    # Create the query engine, where we use a cohere reranker on the fetched nodes
                     Settings.llm = llm
                     query_engine = index.as_query_engine(streaming=True)
                     # ====== Customise prompt template ======
                     qa_prompt_tmpl_str = (
-                    "Context information is below.\n"
-                    "---------------------\n"
-                    "{context_str}\n"
-                    "---------------------\n"
-                    "Given the context information above I want you to think step by step to answer the query in a highly precise and crisp manner focused on the final answer, incase case you don't know the answer say 'I don't know!'.\n"
-                    "Query: {query_str}\n"
-                    "Answer: "
                     )
                     qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
@@ -108,12 +115,13 @@ with st.sidebar:
                 else:
                     query_engine = st.session_state.file_cache[file_key]
-                # Inform the user that the file is processed and Display the PDF uploaded
                 st.success("Ready to Chat!")
         except Exception as e:
             st.error(f"An error occurred: {e}")
             st.stop()
 col1, col2 = st.columns([6, 1])
 with col1:
@@ -126,11 +134,13 @@ with col2:
 if "messages" not in st.session_state:
     reset_chat()
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 # Accept user input
 if prompt := st.chat_input("What's up?"):
     # Add user message to chat history
@@ -156,9 +166,9 @@ if prompt := st.chat_input("What's up?"):
                 st.error("Please load a repository first!")
                 st.stop()
-            # Use the query engine to get the context for the query
             response = query_engine.query(prompt)
             # Handle streaming response
             if hasattr(response, 'response_gen'):
                 for chunk in response.response_gen:
@@ -171,11 +181,10 @@ if prompt := st.chat_input("What's up?"):
                 message_placeholder.markdown(full_response)
             message_placeholder.markdown(full_response)
         except Exception as e:
             st.error(f"An error occurred while processing your query: {str(e)}")
             full_response = "Sorry, I encountered an error while processing your request."
             message_placeholder.markdown(full_response)
     # Add assistant response to chat history
-    st.session_state.messages.append({"role": "assistant", "content": full_response})

 import tempfile
 import uuid
 import pandas as pd
+import openai
 from gitingest import ingest
 from llama_index.core import Settings
+from llama_index.llms.sambanova import SambaNovaCloud
 from llama_index.core import PromptTemplate
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 import streamlit as st
+# Set up SambaNova API key
+os.environ["SAMBANOVA_API_KEY"] = "your_sambanova_api_key"  # Replace with your actual SambaNova API key
 if "id" not in st.session_state:
     st.session_state.id = uuid.uuid4()
     st.session_state.file_cache = {}
 session_id = st.session_state.id
 client = None
 @st.cache_resource
 def load_llm():
+    # Instantiate the SambaNova model
+    llm = SambaNovaCloud(
+        model="Meta-Llama-3.1-405B-Instruct",  # Use the correct model name
+        context_window=100000,
+        max_tokens=1024,
+        temperature=0.7,
+        top_k=1,
+        top_p=0.01,
     )
+    return llm
 def reset_chat():
     st.session_state.messages = []
                     docs = loader.load_data()
                     # setup llm & embedding model
+                    llm = load_llm()
                     embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
                     # Creating an index over loaded data
                     Settings.embed_model = embed_model
                     node_parser = MarkdownNodeParser()
                     index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
+                    # Create the query engine
                     Settings.llm = llm
                     query_engine = index.as_query_engine(streaming=True)
                     # ====== Customise prompt template ======
                     qa_prompt_tmpl_str = (
+                        "Context information is below.\n"
+                        "---------------------\n"
+                        "{context_str}\n"
+                        "---------------------\n"
+                        "Given the context information above I want you to think step by step to answer the query in a highly precise and crisp manner focused on the final answer, in case you don't know the answer say 'I don't know!'.\n"
+                        "Query: {query_str}\n"
+                        "Answer: "
                     )
                     qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
                 else:
                     query_engine = st.session_state.file_cache[file_key]
+                # Inform the user that the file is processed and display the PDF uploaded
                 st.success("Ready to Chat!")
         except Exception as e:
             st.error(f"An error occurred: {e}")
             st.stop()
 col1, col2 = st.columns([6, 1])
 with col1:
 if "messages" not in st.session_state:
     reset_chat()
 # Display chat messages from history on app rerun
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 # Accept user input
 if prompt := st.chat_input("What's up?"):
     # Add user message to chat history
                 st.error("Please load a repository first!")
                 st.stop()
+            # Use the query engine
             response = query_engine.query(prompt)
             # Handle streaming response
             if hasattr(response, 'response_gen'):
                 for chunk in response.response_gen:
                 message_placeholder.markdown(full_response)
             message_placeholder.markdown(full_response)
         except Exception as e:
             st.error(f"An error occurred while processing your query: {str(e)}")
             full_response = "Sorry, I encountered an error while processing your request."
             message_placeholder.markdown(full_response)
     # Add assistant response to chat history
+    st.session_state.messages.append({"role": "assistant", "content": full_response})