Spaces:
Sleeping
Sleeping
File size: 1,089 Bytes
308da16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import pandas as pd
import numpy as np
import os
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
def create_Doc(data):
documents = []
for num, i in data.iterrows():
documents.append(Document(
page_content=i.lyric,
metadata={"name": i.title, "id": num}
))
return documents
def load_embedding(model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2'):
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
)
return embeddings
def load_vectorstore(documents, embeddings):
vectorstore = Chroma.from_documents(
documents,
embedding=embeddings,
)
return vectorstore
def process(list_text, vectorstore, search_type = 'mmr'):
vectorstore.as_retriever(search_type= search_type)
retrieves = []
for i in list_text:
retrieves.append(vectorstore.invoke(i))
return retrieves |