Spaces:
Sleeping
Sleeping
File size: 2,254 Bytes
6998bc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_core.documents import Document
embed = OllamaEmbeddings(
model="jina/jina-embeddings-v2-base-es") # Initialize embeddings
def save_handbook_to_chroma(handbook_data: list) -> bool:
"""
Saves the entire handbook data to Chroma with embeddings.
Args:
handbook_data (list): List of dictionaries containing title, URL, and text content of each section.
Returns:
bool: True if the handbook is saved correctly, False otherwise.
"""
embeddings = OllamaEmbeddings(
model="llama3.1",
)
documents = []
for chapter in handbook_data:
for section in chapter:
document = Document(
page_content=section.get('text', ''),
metadata={
'title': section.get('title', ''),
'url': section.get('url', '')
}
)
documents.append(document)
print("Saving handbook to Chroma. This process can take a long time.")
try:
ids = [str(i) for i in range(1, len(documents) + 1)]
Chroma.from_documents(
documents=documents, embedding=embed, persist_directory="./chroma_data", ids=ids)
return True
except Exception as e:
print(f"Error saving handbook to Chroma: {e}")
return False
def ask_chroma(question: str, k: int = 3) -> dict:
"""
Asks Chroma a question and returns the top k most similar results.
Args:
question (str): The question to ask Chroma.
k (int): The number of most similar results to return. Default is 3.
Returns:
dict: A dictionary containing the top k most similar results.
"""
try:
vectorstore = Chroma(
embedding_function=embed, # Provide the embedding function
persist_directory="./chroma_data"
)
results = vectorstore.similarity_search(question, k)
return results
except Exception as e:
print(f"Error asking Chroma: {e}")
return {}
# similars = ask_chroma(
# "¿Quienes asisten al consejo de barrio?", 2)
# for similar in similars:
# print(similar.page_content+"\n"*3)
|