Spaces:
Running
Running
uploading after new pre-commit rules
Browse files
lynxkite-lynxscribe/src/lynxkite_lynxscribe/lynxscribe_ops.py
CHANGED
@@ -80,9 +80,7 @@ def cloud_file_loader(
|
|
80 |
|
81 |
bucket = client.bucket(bucket_name)
|
82 |
blobs = bucket.list_blobs(prefix=prefix)
|
83 |
-
file_urls = [
|
84 |
-
blob.public_url for blob in blobs if blob.name.endswith(accepted_file_types)
|
85 |
-
]
|
86 |
return {"file_urls": file_urls}
|
87 |
else:
|
88 |
raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
|
@@ -172,25 +170,20 @@ async def ls_image_describer(
|
|
172 |
|
173 |
# creating the prompt objects
|
174 |
ch_prompt_list = [
|
175 |
-
ChatCompletionPrompt(model=llm_visual_model, messages=prompt)
|
176 |
-
for prompt in prompt_list
|
177 |
]
|
178 |
|
179 |
# get the image descriptions
|
180 |
-
tasks = [
|
181 |
-
llm.acreate_completion(completion_prompt=_prompt) for _prompt in ch_prompt_list
|
182 |
-
]
|
183 |
out_completions = await asyncio.gather(*tasks)
|
184 |
results = [
|
185 |
-
dictionary_corrector(result.choices[0].message.content)
|
186 |
-
for result in out_completions
|
187 |
]
|
188 |
|
189 |
# getting the image descriptions (list of dictionaries {image_url: URL, description: description})
|
190 |
# TODO: some result class could be a better idea (will be developed in LynxScribe)
|
191 |
image_descriptions = [
|
192 |
-
{"image_url": image_urls[i], "description": results[i]}
|
193 |
-
for i in range(len(image_urls))
|
194 |
]
|
195 |
|
196 |
return {"image_descriptions": image_descriptions}
|
@@ -232,13 +225,9 @@ async def ls_image_rag_builder(
|
|
232 |
# b) getting the vector store
|
233 |
# TODO: vdb_provider_name should be ENUM, and other parameters should appear accordingly
|
234 |
if vdb_provider_name == "chromadb":
|
235 |
-
vector_store = get_vector_store(
|
236 |
-
name=vdb_provider_name, collection_name=vdb_collection_name
|
237 |
-
)
|
238 |
elif vdb_provider_name == "faiss":
|
239 |
-
vector_store = get_vector_store(
|
240 |
-
name=vdb_provider_name, num_dimensions=vdb_num_dimensions
|
241 |
-
)
|
242 |
else:
|
243 |
raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
|
244 |
|
@@ -334,9 +323,7 @@ async def search_context(rag_graph, text, *, top_k=3):
|
|
334 |
image_url = emb_sim.embedding.metadata["image_url"]
|
335 |
score = emb_sim.score
|
336 |
description = emb_sim.embedding.document
|
337 |
-
result_list.append(
|
338 |
-
{"image_url": image_url, "score": score, "description": description}
|
339 |
-
)
|
340 |
|
341 |
return {"embedding_similarities": result_list}
|
342 |
|
@@ -381,13 +368,9 @@ def ls_text_rag_loader(
|
|
381 |
|
382 |
# getting the vector store
|
383 |
if vdb_provider_name == "chromadb":
|
384 |
-
vector_store = get_vector_store(
|
385 |
-
name=vdb_provider_name, collection_name=vdb_collection_name
|
386 |
-
)
|
387 |
elif vdb_provider_name == "faiss":
|
388 |
-
vector_store = get_vector_store(
|
389 |
-
name=vdb_provider_name, num_dimensions=vdb_num_dimensions
|
390 |
-
)
|
391 |
else:
|
392 |
raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
|
393 |
|
|
|
80 |
|
81 |
bucket = client.bucket(bucket_name)
|
82 |
blobs = bucket.list_blobs(prefix=prefix)
|
83 |
+
file_urls = [blob.public_url for blob in blobs if blob.name.endswith(accepted_file_types)]
|
|
|
|
|
84 |
return {"file_urls": file_urls}
|
85 |
else:
|
86 |
raise ValueError(f"Cloud provider '{cloud_provider}' is not supported.")
|
|
|
170 |
|
171 |
# creating the prompt objects
|
172 |
ch_prompt_list = [
|
173 |
+
ChatCompletionPrompt(model=llm_visual_model, messages=prompt) for prompt in prompt_list
|
|
|
174 |
]
|
175 |
|
176 |
# get the image descriptions
|
177 |
+
tasks = [llm.acreate_completion(completion_prompt=_prompt) for _prompt in ch_prompt_list]
|
|
|
|
|
178 |
out_completions = await asyncio.gather(*tasks)
|
179 |
results = [
|
180 |
+
dictionary_corrector(result.choices[0].message.content) for result in out_completions
|
|
|
181 |
]
|
182 |
|
183 |
# getting the image descriptions (list of dictionaries {image_url: URL, description: description})
|
184 |
# TODO: some result class could be a better idea (will be developed in LynxScribe)
|
185 |
image_descriptions = [
|
186 |
+
{"image_url": image_urls[i], "description": results[i]} for i in range(len(image_urls))
|
|
|
187 |
]
|
188 |
|
189 |
return {"image_descriptions": image_descriptions}
|
|
|
225 |
# b) getting the vector store
|
226 |
# TODO: vdb_provider_name should be ENUM, and other parameters should appear accordingly
|
227 |
if vdb_provider_name == "chromadb":
|
228 |
+
vector_store = get_vector_store(name=vdb_provider_name, collection_name=vdb_collection_name)
|
|
|
|
|
229 |
elif vdb_provider_name == "faiss":
|
230 |
+
vector_store = get_vector_store(name=vdb_provider_name, num_dimensions=vdb_num_dimensions)
|
|
|
|
|
231 |
else:
|
232 |
raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
|
233 |
|
|
|
323 |
image_url = emb_sim.embedding.metadata["image_url"]
|
324 |
score = emb_sim.score
|
325 |
description = emb_sim.embedding.document
|
326 |
+
result_list.append({"image_url": image_url, "score": score, "description": description})
|
|
|
|
|
327 |
|
328 |
return {"embedding_similarities": result_list}
|
329 |
|
|
|
368 |
|
369 |
# getting the vector store
|
370 |
if vdb_provider_name == "chromadb":
|
371 |
+
vector_store = get_vector_store(name=vdb_provider_name, collection_name=vdb_collection_name)
|
|
|
|
|
372 |
elif vdb_provider_name == "faiss":
|
373 |
+
vector_store = get_vector_store(name=vdb_provider_name, num_dimensions=vdb_num_dimensions)
|
|
|
|
|
374 |
else:
|
375 |
raise ValueError(f"Vector store name '{vdb_provider_name}' is not supported.")
|
376 |
|