Spaces:
Runtime error
Runtime error
# pip install langchain chromadb openai unstructured langchain-community langchain-openai langchain_chroma | |
import os | |
import openai | |
import chromadb | |
from langchain_community.document_loaders import DirectoryLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_chroma import Chroma | |
# Put your OpenAI api key here, | |
# or run script with env variables: OPENAI_API_KEY | |
openai.api_key = "" | |
# load documents | |
current_file_path = os.path.abspath(__file__) | |
current_directory_path = os.path.dirname(current_file_path) | |
wiki_docs_path = os.path.join(current_directory_path, "./wiki_docs") | |
loader = DirectoryLoader(wiki_docs_path, glob="*.txt") | |
documents = loader.load() | |
# split documents | |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
texts = text_splitter.split_documents(documents) | |
# setup OpenAI | |
embedding_function = OpenAIEmbeddings(openai_api_key=openai.api_key) | |
# setup Chroma database | |
host = "localhost" | |
port = "8000" | |
chroma_client = chromadb.HttpClient(host= host, port= port,) | |
# loading docs into database | |
print("Loading documents with embeddings into database...") | |
collection_name = "china_history" | |
db = Chroma.from_documents(documents=texts, embedding=embedding_function, client=chroma_client, collection_name=collection_name) | |
print("Done") | |
# RAG openai | |
retriever = db.as_retriever() | |
docs = retriever.get_relevant_documents("Who is Wu Zetian?") | |
print(docs) | |