jedick commited on
Commit
7565b92
·
1 Parent(s): bf82de1

Update langchain-chroma to fix ChromaDB ValueError

Browse files
Files changed (4) hide show
  1. app.py +2 -2
  2. prompts.py +1 -1
  3. requirements.txt +23 -11
  4. retriever.py +0 -3
app.py CHANGED
@@ -72,7 +72,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
72
  print_exception=False,
73
  )
74
 
75
- # Get graph for compute mode
76
  graph = graph_instances[compute_mode].get(session_hash)
77
  if graph is not None:
78
  print(f"Get {compute_mode} graph for session {session_hash}")
@@ -274,7 +274,7 @@ with gr.Blocks(
274
  )
275
 
276
  loading_data = gr.Textbox(
277
- "Please wait for data loading to complete.",
278
  max_lines=0,
279
  label="Loading Data",
280
  visible=False,
 
72
  print_exception=False,
73
  )
74
 
75
+ # Get graph instance
76
  graph = graph_instances[compute_mode].get(session_hash)
77
  if graph is not None:
78
  print(f"Get {compute_mode} graph for session {session_hash}")
 
274
  )
275
 
276
  loading_data = gr.Textbox(
277
+ "Please wait for the email database to be downloaded and extracted.",
278
  max_lines=0,
279
  label="Loading Data",
280
  visible=False,
prompts.py CHANGED
@@ -61,7 +61,7 @@ def generate_prompt(chat_model, think=False, with_tools=False):
61
  "You must include inline citations (email senders and dates) in each part of your response. "
62
  "Only answer general questions about R if the answer is in the retrieved emails. "
63
  "Your response can include URLs, but make sure they are unchanged from the retrieved emails. " # Qwen
64
- "Respond with 300 words maximum and 30 lines of code maximum. "
65
  )
66
  if with_tools:
67
  prompt = (
 
61
  "You must include inline citations (email senders and dates) in each part of your response. "
62
  "Only answer general questions about R if the answer is in the retrieved emails. "
63
  "Your response can include URLs, but make sure they are unchanged from the retrieved emails. " # Qwen
64
+ "Respond with 500 words maximum and 50 lines of code maximum. "
65
  )
66
  if with_tools:
67
  prompt = (
requirements.txt CHANGED
@@ -1,30 +1,42 @@
1
- langchain>=0.3.25,<1.0
2
- langchain-community>=0.3.24,<1.0
3
- langchain-chroma==0.2.4
4
- langchain-huggingface==0.3.0
5
- langgraph>=0.4.7,<0.6
6
- # Required by langchain-huggingface
7
- sentence-transformers==5.0.0
8
- # Required by Nomic embeddings
9
- einops==0.8.1
10
  torch==2.5.1
 
 
11
  # Stated requirements:
12
- # SmolLM3: transformers>=4.53
13
  # Gemma 3: transformers>=4.50
 
 
14
  # NOTE: Gemma 3 with transformers==4.54.0 gives:
15
  # ValueError: Max cache length is not consistent across layers
16
  transformers==4.51.3
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # Commented because we have local modifications
18
  #tool-calling-llm==0.1.2
19
  bm25s==0.2.12
20
  ragas==0.2.15
 
21
  # posthog<6.0.0 is temporary fix for ChromaDB telemetry error log messages
22
  # https://github.com/vanna-ai/vanna/issues/917
23
  posthog==5.4.0
 
24
  # Gradio for the web interface
25
  gradio==5.38.2
26
  spaces==0.37.1
 
27
  # For downloading data from S3
28
  boto3==1.39.14
 
29
  # Others
30
- python-dotenv
 
1
+ # Pin torch and chroma versions
 
 
 
 
 
 
 
 
2
  torch==2.5.1
3
+ chromadb==1.0.13
4
+
5
  # Stated requirements:
 
6
  # Gemma 3: transformers>=4.50
7
+ # Qwen3: transformers>=4.51
8
+ # SmolLM3: transformers>=4.53
9
  # NOTE: Gemma 3 with transformers==4.54.0 gives:
10
  # ValueError: Max cache length is not consistent across layers
11
  transformers==4.51.3
12
+
13
+ # Langchain packages
14
+ langchain==0.3.25
15
+ langchain-community==0.3.24
16
+ langchain-chroma==0.2.5
17
+ langchain-huggingface==0.3.0
18
+ langgraph==0.4.7
19
+
20
+ # Required by langchain-huggingface
21
+ sentence-transformers==5.0.0
22
+ # Required by Nomic embeddings
23
+ einops==0.8.1
24
+
25
  # Commented because we have local modifications
26
  #tool-calling-llm==0.1.2
27
  bm25s==0.2.12
28
  ragas==0.2.15
29
+
30
  # posthog<6.0.0 is temporary fix for ChromaDB telemetry error log messages
31
  # https://github.com/vanna-ai/vanna/issues/917
32
  posthog==5.4.0
33
+
34
  # Gradio for the web interface
35
  gradio==5.38.2
36
  spaces==0.37.1
37
+
38
  # For downloading data from S3
39
  boto3==1.39.14
40
+
41
  # Others
42
+ python-dotenv==1.1.1
retriever.py CHANGED
@@ -174,9 +174,6 @@ def BuildRetrieverDense(compute_mode: str, top_k=6):
174
  # Get top k documents
175
  search_kwargs={"k": top_k},
176
  )
177
- # Fix for ValueError('Could not connect to tenant default_tenant. Are you sure it exists?')
178
- # https://github.com/langchain-ai/langchain/issues/26884
179
- chromadb.api.client.SharedSystemClient.clear_system_cache()
180
  return retriever
181
 
182
 
 
174
  # Get top k documents
175
  search_kwargs={"k": top_k},
176
  )
 
 
 
177
  return retriever
178
 
179