reibs's picture
hugging face update
f91510c
raw
history blame
1.11 kB
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import OnlinePDFLoader
def get_context(arxiv_link: str, prompt: str) -> str:
# Load the document
loader = OnlinePDFLoader(arxiv_link)
doc = loader.load()
# Split the document into sentences
splitter = RecursiveCharacterTextSplitter()
sentences = splitter.split(doc)
# Embed the sentences
embeddings = OpenAIEmbeddings()
embedded_sentences = embeddings.embed(sentences)
# Create a vector store
store = Chroma()
# Create a language model
lm = OpenAI()
# Create a QA chain
chain = VectorDBQA(store, lm)
# Add the embedded sentences to the vector store
for sentence, embedding in zip(sentences, embedded_sentences):
store.add(sentence, embedding)
# Ask the QA chain a question
return chain.ask(prompt)