Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
import os | |
from langchain_core.documents import Document | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_chroma import Chroma | |
def create_Doc(data): | |
documents = [] | |
for num, i in data.iterrows(): | |
documents.append(Document( | |
page_content=i.lyric, | |
metadata={"name": i.title, "id": num} | |
)) | |
return documents | |
def load_embedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'): | |
embeddings = HuggingFaceEmbeddings( | |
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" | |
) | |
return embeddings | |
def load_vectorstore(documents, embeddings): | |
vectorstore = Chroma.from_documents( | |
documents, | |
embedding=embeddings, | |
) | |
return vectorstore | |
def process(list_text, vectorstore, search_type = 'mmr'): | |
vectorstore.as_retriever(search_type= search_type) | |
retrieves = [] | |
for i in list_text: | |
retrieves.append(vectorstore.invoke(i)) | |
return retrieves |