Spaces:
Sleeping
Sleeping
Daniel Foley
commited on
Commit
·
0716283
1
Parent(s):
706b16d
1.5 workers for parallel. Avoiding rate limits
Browse files
RAG.py
CHANGED
@@ -15,6 +15,7 @@ from typing import Dict, Any, Optional, List, Tuple
|
|
15 |
import logging
|
16 |
import concurrent.futures
|
17 |
import json
|
|
|
18 |
|
19 |
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 100) -> Tuple[List[Document], List[float]]:
|
20 |
start = time.time()
|
@@ -85,8 +86,20 @@ def process_single_document(doc: Document) -> Optional[Document]:
|
|
85 |
)
|
86 |
return None
|
87 |
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
start = time.time()
|
91 |
if not documents:
|
92 |
return []
|
@@ -94,8 +107,12 @@ def rerank(documents: List[Document], query: str, max_workers: int = 3) -> List[
|
|
94 |
meta_start = time.time()
|
95 |
full_docs = []
|
96 |
|
|
|
|
|
|
|
|
|
97 |
# Process documents in parallel using ThreadPoolExecutor
|
98 |
-
with concurrent.futures.ThreadPoolExecutor(max_workers=
|
99 |
# Submit all document processing tasks
|
100 |
future_to_doc = {
|
101 |
executor.submit(process_single_document, doc): doc
|
@@ -205,7 +222,7 @@ def RAG(llm: Any, query: str,vectorstore:PineconeVectorStore, top: int = 10, k:
|
|
205 |
First, reason about the answer between <REASONING></REASONING> headers,
|
206 |
based on the context determine if there is sufficient material for answering the exact question,
|
207 |
return either <VALID>YES</VALID> or <VALID>NO</VALID>
|
208 |
-
then return a response between <RESPONSE></RESPONSE> headers
|
209 |
Here is an example
|
210 |
<EXAMPLE>
|
211 |
<QUERY>Are pineapples a good fuel for cars?</QUERY>
|
|
|
15 |
import logging
|
16 |
import concurrent.futures
|
17 |
import json
|
18 |
+
from threading import Lock
|
19 |
|
20 |
def retrieve(query: str,vectorstore:PineconeVectorStore, k: int = 100) -> Tuple[List[Document], List[float]]:
|
21 |
start = time.time()
|
|
|
86 |
)
|
87 |
return None
|
88 |
|
89 |
+
# Global state to track alternating behavior
|
90 |
+
_use_two_workers = False
|
91 |
+
_worker_lock = Lock()
|
92 |
+
|
93 |
+
def get_current_worker_count() -> int:
|
94 |
+
"""Thread-safe way to get and toggle the worker count between 1 and 2."""
|
95 |
+
global _use_two_workers
|
96 |
+
with _worker_lock:
|
97 |
+
current_workers = 2 if _use_two_workers else 1
|
98 |
+
_use_two_workers = not _use_two_workers # Toggle for next time
|
99 |
+
return current_workers
|
100 |
+
|
101 |
+
def rerank(documents: List[Document], query: str) -> List[Document]:
|
102 |
+
"""Ingest more metadata and rerank documents using BM25 with alternating worker counts."""
|
103 |
start = time.time()
|
104 |
if not documents:
|
105 |
return []
|
|
|
107 |
meta_start = time.time()
|
108 |
full_docs = []
|
109 |
|
110 |
+
# Get the worker count for this specific call
|
111 |
+
worker_count = get_current_worker_count()
|
112 |
+
logging.info(f"Processing with {worker_count} worker{'s' if worker_count > 1 else ''}")
|
113 |
+
|
114 |
# Process documents in parallel using ThreadPoolExecutor
|
115 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=worker_count) as executor:
|
116 |
# Submit all document processing tasks
|
117 |
future_to_doc = {
|
118 |
executor.submit(process_single_document, doc): doc
|
|
|
222 |
First, reason about the answer between <REASONING></REASONING> headers,
|
223 |
based on the context determine if there is sufficient material for answering the exact question,
|
224 |
return either <VALID>YES</VALID> or <VALID>NO</VALID>
|
225 |
+
then return a response between <RESPONSE></RESPONSE> headers:
|
226 |
Here is an example
|
227 |
<EXAMPLE>
|
228 |
<QUERY>Are pineapples a good fuel for cars?</QUERY>
|