File size: 3,679 Bytes
74748ba
2d2e179
 
 
74748ba
 
2d2e179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74748ba
 
2d2e179
 
 
 
74748ba
 
2d2e179
74748ba
 
2d2e179
 
 
 
 
 
 
 
 
 
 
 
 
 
14b60e8
2d2e179
 
 
 
 
 
 
 
 
 
74748ba
2d2e179
 
 
 
 
 
 
74748ba
 
2d2e179
 
 
 
 
 
 
 
 
 
 
74748ba
2d2e179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74748ba
2d2e179
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
from PyPDF2 import PdfReader
import tqdm
import os 
import openai
import time
import gradio as gr
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import AzureOpenAI
from chromadb.utils import embedding_functions
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain import VectorDBQA
from langchain.llms import AzureOpenAI
import openai


os.environ["OPENAI_API_TYPE"] = openai.api_type = "azure"
os.environ["OPENAI_API_VERSION"] = openai.api_version = "2022-12-01"
os.environ["OPENAI_API_BASE"] = openai.api_base = "https://openai-endpoint.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = openai.api_key = "f056ead909e54ea0a2fb570e2febad2b"


embeddings = []


def pdf_to_text(file_obj, pdf_text, vectorstore, progress = gr.Progress(track_tqdm=True)):
    reader = PdfReader(file_obj)
    number_of_pages = len(reader.pages)
    pdf_text = ""
    for page_number in range(number_of_pages):
        page = reader.pages[page_number]
        pdf_text += page.extract_text()
    
    text_splitter = RecursiveCharacterTextSplitter(        
    chunk_size = 1000,
    chunk_overlap  = 200,
    length_function = len,)
    texts = text_splitter.split_text(pdf_text)


    

    for text in tqdm.tqdm(texts):

        try:
            response = openai.Embedding.create(
            input=text,
            engine="text-embedding-ada-002")
            emb = response['data'][0]['embedding']
            embeddings.append(emb)
        except Exception as e:
            print(e)
            time.sleep(5)
            response = openai.Embedding.create(
            input=text,
            engine="text-embedding-ada-002")
            emb = response['data'][0]['embedding']
            embeddings.append(emb)

    
    azure_embeddings = OpenAIEmbeddings(document_model_name="text-embedding-ada-002",query_model_name="text-embedding-ada-002")
    vectorstore = Chroma("collection", embedding_function=azure_embeddings)
    vectorstore._collection.add(
    ids= [f"doc_{i}" for i in range(len(texts))],
    documents=texts,
    embeddings=embeddings,
    metadatas=[{"source": "source"} for text in texts]
    )

    


    return pdf_text, vectorstore

def add_text(state, query, vectorstore):
    
    # state = state + [(text, text + "?")]
    qa = VectorDBQA.from_chain_type(llm= AzureOpenAI(deployment_name="davinci003", model_name="text-davinci-003"), chain_type="stuff", vectorstore=vectorstore)
    qa = qa.run(query)
    # chain.run(input_documents=docs, question=query)
    state = state + [(query, qa)]
    return state, state, vectorstore


with gr.Blocks(title="AOAI") as demo:
    pdf_text = gr.State([])
    vectorstore = gr.State([])
    text_box = gr.TextArea()
    upload_button = gr.UploadButton("Click to Upload a File", file_types=["pdf"])
    upload_button.upload(pdf_to_text, inputs=[upload_button, pdf_text, vectorstore], outputs=[pdf_text, vectorstore])

    with gr.Row():
        chatbot = gr.Chatbot()
        state = gr.State([])
    
 
        text = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
            
        text.submit(add_text, [state, text, vectorstore], [chatbot, state, vectorstore])