Sbnos commited on
Commit
64e42d8
·
verified ·
1 Parent(s): 24337c4

Update prompt and streaming

Browse files
Files changed (1) hide show
  1. app.py +47 -23
app.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import streamlit as st
3
  from together import Together
4
  from langchain_community.vectorstores import Chroma
5
- from langchain_community.embeddings import HuggingFaceBgeEmbeddings
 
6
 
7
  # --- Configuration ---
8
  # TogetherAI API key (env var name pilotikval)
@@ -14,9 +15,9 @@ if not TOGETHER_API_KEY:
14
  # Initialize TogetherAI client
15
  client = Together(api_key=TOGETHER_API_KEY)
16
 
17
- # Embeddings setup
18
  EMBED_MODEL_NAME = "BAAI/bge-base-en"
19
- embeddings = HuggingFaceBgeEmbeddings(
20
  model_name=EMBED_MODEL_NAME,
21
  encode_kwargs={"normalize_embeddings": True},
22
  )
@@ -54,17 +55,33 @@ vectorstore = Chroma(
54
  )
55
  retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) # k=20
56
 
57
- # System prompt template with instruction for detailed long answers
58
  def build_system(context: str) -> dict:
59
- return {
60
- "role": "system",
61
- "content": (
62
- "You are an expert medical assistant. Provide a thorough, detailed, and complete answer. "
63
- "If you don't know, say you don't know.\n"
64
- "Use the following context from medical docs to answer.\n\n"
65
- "Context:\n" + context
66
- )
67
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  st.title("🩺 DocChatter RAG (Streaming & Memory)")
70
 
@@ -83,27 +100,29 @@ with chat_tab:
83
  for msg in st.session_state.chat_history:
84
  st.chat_message(msg['role']).write(msg['content'])
85
 
86
- # On new input
87
  if user_prompt:
88
  # Echo user
89
  st.chat_message("user").write(user_prompt)
90
  st.session_state.chat_history.append({"role": "user", "content": user_prompt})
91
 
92
- # Retrieve top-k docs
93
- docs = retriever.get_relevant_documents(user_prompt)
 
 
 
94
  context = "\n---\n".join([d.page_content for d in docs])
95
 
96
- # Build message sequence: system + full history
97
  messages = [build_system(context)]
98
  for m in st.session_state.chat_history:
99
  messages.append(m)
100
 
101
- # Prepare streaming response
102
  response_container = st.chat_message("assistant")
103
  stream_placeholder = response_container.empty()
104
  answer = ""
105
 
106
- # Stream tokens
107
  for token in client.chat.completions.create(
108
  model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
109
  messages=messages,
@@ -111,10 +130,15 @@ with chat_tab:
111
  temperature=0.1,
112
  stream=True
113
  ):
114
- if hasattr(token, 'choices') and token.choices[0].delta.content:
115
- delta = token.choices[0].delta.content
116
- answer += delta
117
- stream_placeholder.write(answer)
 
 
 
 
 
118
 
119
  # Save assistant response
120
  st.session_state.chat_history.append({"role": "assistant", "content": answer})
 
2
  import streamlit as st
3
  from together import Together
4
  from langchain_community.vectorstores import Chroma
5
+ # Use the updated HuggingFace Embeddings class
6
+ from langchain_huggingface import HuggingFaceEmbeddings
7
 
8
  # --- Configuration ---
9
  # TogetherAI API key (env var name pilotikval)
 
15
  # Initialize TogetherAI client
16
  client = Together(api_key=TOGETHER_API_KEY)
17
 
18
+ # Embeddings setup (new huggingface integration)
19
  EMBED_MODEL_NAME = "BAAI/bge-base-en"
20
+ embeddings = HuggingFaceEmbeddings(
21
  model_name=EMBED_MODEL_NAME,
22
  encode_kwargs={"normalize_embeddings": True},
23
  )
 
55
  )
56
  retriever = vectorstore.as_retriever(search_kwargs={"k": 20}) # k=20
57
 
58
+ # System prompt template for long, detailed answers
59
  def build_system(context: str) -> dict:
60
+ """
61
+ Build the system prompt that instructs the model to:
62
+ - Act as an expert medical assistant and attentive listener.
63
+ - Leverage all retrieved context to craft detailed, accurate, and empathetic responses.
64
+ - Ask clarifying follow-up questions if the user’s query is ambiguous.
65
+ - Structure answers clearly, using headings, bullet points, and step-by-step explanations.
66
+ - Cite relevant context sections when appropriate.
67
+ - Maintain conversational memory so follow-up queries build upon prior discussion.
68
+ """
69
+ prompt = (
70
+ "You are a world-class medical assistant and conversational partner. "
71
+ "Listen carefully to the user’s questions, reference the context below, and provide a thorough, evidence-based response. "
72
+ "If any part of the question is unclear, ask a clarifying question before proceeding. "
73
+ "Organize your answer with clear headings or bullet points, and refer back to specific context snippets as needed. "
74
+ "Always be empathetic, concise, and precise in your medical explanations. "
75
+ "Retain memory of previous user messages to support follow-up interactions.
76
+
77
+ "
78
+ "=== Retrieved Context Start ===
79
+ "
80
+ + context +
81
+ "
82
+ === Retrieved Context End ==="
83
+ )
84
+ return {"role": "system", "content": prompt}
85
 
86
  st.title("🩺 DocChatter RAG (Streaming & Memory)")
87
 
 
100
  for msg in st.session_state.chat_history:
101
  st.chat_message(msg['role']).write(msg['content'])
102
 
103
+ # Handle new user input
104
  if user_prompt:
105
  # Echo user
106
  st.chat_message("user").write(user_prompt)
107
  st.session_state.chat_history.append({"role": "user", "content": user_prompt})
108
 
109
+ # Retrieve top-k documents
110
+ try:
111
+ docs = retriever.invoke({"query": user_prompt})
112
+ except Exception:
113
+ docs = retriever.get_relevant_documents(user_prompt)
114
  context = "\n---\n".join([d.page_content for d in docs])
115
 
116
+ # Build TogetherAI message sequence
117
  messages = [build_system(context)]
118
  for m in st.session_state.chat_history:
119
  messages.append(m)
120
 
121
+ # Stream assistant response
122
  response_container = st.chat_message("assistant")
123
  stream_placeholder = response_container.empty()
124
  answer = ""
125
 
 
126
  for token in client.chat.completions.create(
127
  model="meta-llama/Llama-4-Scout-17B-16E-Instruct",
128
  messages=messages,
 
130
  temperature=0.1,
131
  stream=True
132
  ):
133
+ try:
134
+ choice = token.choices[0]
135
+ delta = getattr(choice.delta, 'content', '')
136
+ if delta:
137
+ answer += delta
138
+ stream_placeholder.write(answer)
139
+ except (IndexError, AttributeError):
140
+ # Skip empty or malformed token
141
+ continue
142
 
143
  # Save assistant response
144
  st.session_state.chat_history.append({"role": "assistant", "content": answer})