File size: 1,107 Bytes
f91510c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import OnlinePDFLoader

def get_context(arxiv_link: str, prompt: str) -> str:

    # Load the document
    loader = OnlinePDFLoader(arxiv_link)
    doc = loader.load()

    # Split the document into sentences
    splitter = RecursiveCharacterTextSplitter()
    sentences = splitter.split(doc)

    # Embed the sentences
    embeddings = OpenAIEmbeddings()
    embedded_sentences = embeddings.embed(sentences)

    # Create a vector store
    store = Chroma()

    # Create a language model
    lm = OpenAI()

    # Create a QA chain
    chain = VectorDBQA(store, lm)

    # Add the embedded sentences to the vector store
    for sentence, embedding in zip(sentences, embedded_sentences):
        store.add(sentence, embedding)

    # Ask the QA chain a question
    return chain.ask(prompt)