muhammadsalmanalfaridzi commited on
Commit
ac6025d
·
verified ·
1 Parent(s): ed58dc6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -44
app.py CHANGED
@@ -3,51 +3,35 @@ import gc
3
  import tempfile
4
  import uuid
5
  import pandas as pd
6
-
7
- from llama_index.llms.cerebras import Cerebras
8
  from llama_index.core import Settings
 
 
9
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
10
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
11
  from llama_index.readers.docling import DoclingReader
12
  from llama_index.core.node_parser import MarkdownNodeParser
13
-
14
- import streamlit as st
15
 
16
  if "id" not in st.session_state:
17
  st.session_state.id = uuid.uuid4()
18
  st.session_state.file_cache = {}
19
 
20
  session_id = st.session_state.id
 
21
 
22
- # Load Cerebras API key
23
- api_key = os.getenv("CEREBRAS_API_KEY")
24
- if not api_key:
25
- raise ValueError("CEREBRAS_API_KEY is not set in Hugging Face Secrets.")
26
- else:
27
- print("Cerebras API key loaded successfully.")
28
-
29
- @st.cache_resource
30
  def load_llm():
31
- # Initialize Cerebras client
32
- os.environ["CEREBRAS_API_KEY"] = api_key
33
- client = Cerebras(api_key=os.environ["CEREBRAS_API_KEY"])
34
- Settings.client = client
35
- return client
36
-
37
- def query_cerebras(client, prompt, max_tokens=1024, temperature=0.2, top_p=1):
38
- # Query Cerebras model
39
- stream = client.chat.completions.create(
40
- messages=[{"role": "user", "content": prompt}],
41
- model="llama-3.3-70b",
42
- stream=True,
43
- max_completion_tokens=max_tokens,
44
- temperature=temperature,
45
- top_p=top_p,
46
- )
47
- return stream
48
 
49
  def reset_chat():
50
  st.session_state.messages = []
 
51
  gc.collect()
52
 
53
  def display_excel(file):
@@ -87,24 +71,42 @@ with st.sidebar:
87
 
88
  docs = loader.load_data()
89
 
90
- # Setup embedding model
 
91
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
92
- Settings.embed_model = embed_model
93
-
94
  # Creating an index over loaded data
 
95
  node_parser = MarkdownNodeParser()
96
  index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
97
 
98
- # Store index as query engine
 
99
  query_engine = index.as_query_engine(streaming=True)
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  st.session_state.file_cache[file_key] = query_engine
102
  else:
103
  query_engine = st.session_state.file_cache[file_key]
104
 
105
- # Inform the user that the file is processed and display the Excel uploaded
106
  st.success("Ready to Chat!")
107
  display_excel(uploaded_file)
 
108
  except Exception as e:
109
  st.error(f"An error occurred: {e}")
110
  st.stop()
@@ -112,7 +114,7 @@ with st.sidebar:
112
  col1, col2 = st.columns([6, 1])
113
 
114
  with col1:
115
- st.header(f"RAG over Excel using DuckLink 🐥 & Llama-3.3-70B")
116
 
117
  with col2:
118
  st.button("Clear ↺", on_click=reset_chat)
@@ -121,9 +123,6 @@ with col2:
121
  if "messages" not in st.session_state:
122
  reset_chat()
123
 
124
- # Initialize LLM client
125
- client = load_llm()
126
-
127
  # Display chat messages from history on app rerun
128
  for message in st.session_state.messages:
129
  with st.chat_message(message["role"]):
@@ -142,13 +141,14 @@ if prompt := st.chat_input("What's up?"):
142
  message_placeholder = st.empty()
143
  full_response = ""
144
 
145
- # Query Cerebras model
146
- stream = query_cerebras(client, prompt)
 
 
147
 
148
- # Handle streaming response
149
- for chunk in stream:
150
- content = chunk.choices[0].delta.get("content", "")
151
- full_response += content
152
  message_placeholder.markdown(full_response + "▌")
153
 
154
  message_placeholder.markdown(full_response)
 
3
  import tempfile
4
  import uuid
5
  import pandas as pd
6
+ import streamlit as st
 
7
  from llama_index.core import Settings
8
+ from llama_index.llms.cerebras import Cerebras
9
+ from llama_index.core import PromptTemplate
10
  from llama_index.embeddings.huggingface import HuggingFaceEmbedding
11
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
12
  from llama_index.readers.docling import DoclingReader
13
  from llama_index.core.node_parser import MarkdownNodeParser
14
+ from llama_index.core.llms import ChatMessage
 
15
 
16
  if "id" not in st.session_state:
17
  st.session_state.id = uuid.uuid4()
18
  st.session_state.file_cache = {}
19
 
20
  session_id = st.session_state.id
21
+ client = None
22
 
23
+ # Initialize Cerebras LLM
 
 
 
 
 
 
 
24
  def load_llm():
25
+ # Replace with your API Key and model
26
+ api_key = os.getenv("CEREBRAS_API_KEY")
27
+ if not api_key:
28
+ api_key = st.text_input("Enter your Cerebras API key:")
29
+ llm = Cerebras(model="llama-3.3-70b", api_key=api_key)
30
+ return llm
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  def reset_chat():
33
  st.session_state.messages = []
34
+ st.session_state.context = None
35
  gc.collect()
36
 
37
  def display_excel(file):
 
71
 
72
  docs = loader.load_data()
73
 
74
+ # setup llm & embedding model
75
+ llm = load_llm() # Load the Cerebras model
76
  embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
 
 
77
  # Creating an index over loaded data
78
+ Settings.embed_model = embed_model
79
  node_parser = MarkdownNodeParser()
80
  index = VectorStoreIndex.from_documents(documents=docs, transformations=[node_parser], show_progress=True)
81
 
82
+ # Create the query engine
83
+ Settings.llm = llm
84
  query_engine = index.as_query_engine(streaming=True)
85
 
86
+ # Customise prompt template
87
+ qa_prompt_tmpl_str = (
88
+ "Context information is below.\n"
89
+ "---------------------\n"
90
+ "{context_str}\n"
91
+ "---------------------\n"
92
+ "Given the context information above I want you to think step by step to answer the query in a highly precise and crisp manner focused on the final answer, in case you don't know the answer say 'I don't know!'.\n"
93
+ "Query: {query_str}\n"
94
+ "Answer: "
95
+ )
96
+ qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
97
+
98
+ query_engine.update_prompts(
99
+ {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
100
+ )
101
+
102
  st.session_state.file_cache[file_key] = query_engine
103
  else:
104
  query_engine = st.session_state.file_cache[file_key]
105
 
106
+ # Inform the user that the file is processed and Display the PDF uploaded
107
  st.success("Ready to Chat!")
108
  display_excel(uploaded_file)
109
+
110
  except Exception as e:
111
  st.error(f"An error occurred: {e}")
112
  st.stop()
 
114
  col1, col2 = st.columns([6, 1])
115
 
116
  with col1:
117
+ st.header(f"RAG over Excel using Dockling 🐥 & Llama-3.3 70B")
118
 
119
  with col2:
120
  st.button("Clear ↺", on_click=reset_chat)
 
123
  if "messages" not in st.session_state:
124
  reset_chat()
125
 
 
 
 
126
  # Display chat messages from history on app rerun
127
  for message in st.session_state.messages:
128
  with st.chat_message(message["role"]):
 
141
  message_placeholder = st.empty()
142
  full_response = ""
143
 
144
+ # Using Cerebras stream_chat for streaming response
145
+ messages = [
146
+ ChatMessage(role="user", content=prompt)
147
+ ]
148
 
149
+ response = llm.stream_chat(messages)
150
+ for r in response:
151
+ full_response += r.delta
 
152
  message_placeholder.markdown(full_response + "▌")
153
 
154
  message_placeholder.markdown(full_response)