File size: 2,044 Bytes
a7c1ef2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from langchain.embeddings import OpenAIEmbeddings # for creating embeddings
from langchain.vectorstores import Chroma # for the vectorization part
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import OpenAI # the LLM model we'll use (CHatGPT)
import gradio as gr
from gradio import inputs, outputs
from gradio.mix import Parallel

max_sources = 4

embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory="/chroma", embedding_function=embedding)
pdf_qa = ConversationalRetrievalChain.from_llm(OpenAI(temperature=0.9, model_name="gpt-3.5-turbo"),
                                    vectordb.as_retriever(), return_source_documents=True)


def chat_pdf(query, chat_history=""):
    result = pdf_qa({"question": query, "chat_history": chat_history})
    answer = result["answer"]
    source_docs = result["source_documents"]

    print("source_docs", len(source_docs))

    cleaned_docs = []
    for doc in source_docs:
        cleaned_content = doc.page_content
        metadata_info = f"Metadata: {doc.metadata}\n"
        cleaned_docs.append(metadata_info + cleaned_content)

    # Pad the outputs to match the number of output components in the Gradio interface
    padded_outputs = [answer] + cleaned_docs + [""] * (max_sources - len(cleaned_docs))
    return padded_outputs
    return [answer] + cleaned_docs


def create_outputs(num_sources):
    outputs = [gr.outputs.Textbox(label="Answer")]

    for i in range(1, num_sources + 1):
        outputs.append(gr.outputs.Textbox(label=f"Source Document {i}"))

    return outputs


iface = gr.Interface(
    fn=chat_pdf,
    inputs=[gr.inputs.Textbox(label="Query")],
    outputs=create_outputs(max_sources),
    layout="vertical",
    examples=[
        ["Give 2 species of fulgoroidea"],
        ["What colors are found among fulgoroidea?"],
        ["Why are fulgoroidea so cute?"]
        # Add more example queries if desired
    ],
    css=".answer, .source_documents {width: 45%; float: left; margin-right: 20px;}"
)

iface.launch(debug=True)