cloud-sean commited on
Commit
2d2e179
·
1 Parent(s): c704005

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -89
app.py CHANGED
@@ -1,104 +1,109 @@
1
  import gradio as gr
 
 
 
2
  import openai
3
- import os
4
  import time
5
- import shutil
6
- from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader
7
- from threading import Lock
8
- from typing import Optional, Tuple
9
- from azure.ai.formrecognizer import DocumentAnalysisClient
10
- from azure.core.credentials import AzureKeyCredential
 
 
 
 
 
 
 
 
 
 
11
 
12
- os.environ['OPENAI_API_KEY'] = "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"
13
- endpoint = "https://eastus.api.cognitive.microsoft.com/"
14
- credential = AzureKeyCredential("844948341c6d4596b77b770cf12e386b")
15
 
16
- form_recognizer_client = DocumentAnalysisClient(endpoint=endpoint, credential=credential)
 
 
 
17
 
18
 
 
19
 
20
- class ChatWrapper:
21
- def __init__(self):
22
- self.lock = Lock()
23
 
24
- def __call__(self, input, history: Optional[Tuple[str, str]]):
25
- self.lock.acquire()
26
- try:
27
- history = history or []
28
- documents = SimpleDirectoryReader('data').load_data()
29
- index = GPTSimpleVectorIndex(documents)
30
- response = index.query(input, verbose=True)
 
 
 
 
 
 
 
31
 
32
- history.append((input, str(response)))
 
 
 
 
 
 
 
 
 
33
  except Exception as e:
 
 
 
 
 
 
 
34
 
35
- return gr.HTML(f"Error: {e}")
36
- finally:
37
- self.lock.release()
38
- return history, history
39
-
40
- def make_status_box_visible():
41
- return gr.update(visible=True), gr.update(visible=False)
42
-
43
- def create_index():
44
- documents = SimpleDirectoryReader('data').load_data()
45
- index = GPTSimpleVectorIndex(documents)
46
-
47
-
48
- def pdf_to_text(file_obj, progress=gr.Progress()):
49
- progress(0.2, desc="Uploading file...")
50
-
51
- with open(file_obj.name, "rb") as f:
52
- progress(0.5, desc="Analyzing file...")
53
- poller = form_recognizer_client.begin_analyze_document("prebuilt-document", f)
54
- progress(0.8, desc="Applying OCR...")
55
- result = poller.result()
56
- f.close()
57
- progress(0.9, desc="Azure OpenAI Magic...")
58
- #save the result.content in a text file
59
- # generate random stringsdsd dawhdidsd nvjhv dwdwdiwhd
60
- import random, string
61
- with open("data/" + ''.join(random.choices(string.ascii_uppercase + string.digits, k = 10)) + ".txt", "w") as f:
62
- f.write(str(result.content))
63
- f.close()
64
- # create_index()
65
- progress(1.0, desc="Done!")
66
- time.sleep(1.5)
67
- return str(result.content), gr.update(visible=True), gr.update(visible=False)
68
-
69
- chat = ChatWrapper()
70
- # rabbndi dawdwda wadawd dwad aidiodsdawhd hjsssbjhjbhjb ddw
71
- with gr.Blocks(css="footer {visibility: hidden;}", theme="grass") as demo:
72
- chat_history_state = gr.State()
73
- pdf_content = gr.State()
74
-
75
- gr.Markdown("""
76
- <sub><sup>created by [@shamill](https://whoplus.microsoft.com/?_vwp=true&_vwpAlias=SHAMILL)</sup></sub>
77
- # Customized GPT-3 Chatbot
78
 
79
- GPT-3.5 is a powerful language model, it can be used to create a chatbot that can have a conversation with you. This demo allows you to customize the context of the conversation, and the chatbot will stick to the confines of the context you provide, avoiding made up answers. The chatbot is powered by Azure's OpenAI GPT-3 API.""")
80
- ### this is where they will upload the pdf
81
-
82
-
83
-
84
- with gr.Column(visible=False) as chat_interface:
85
- with gr.Row():
86
- chatbot = gr.Chatbot()
87
- with gr.Row():
88
- message_box = gr.Textbox(lines=2, placeholder="Type a message...", default="Hi there!")
89
- submit_button = gr.Button("Submit").style(full_width=False)
90
- submit_button.click(chat, inputs=[message_box, chat_history_state], outputs=[chatbot, chat_history_state])
91
- with gr.Column(visible=True) as upload_interface:
92
- with gr.Row():
93
- upload = gr.File(fn=pdf_to_text, label="Upload a context pdf file", type="file")
94
- with gr.Row():
95
- button = gr.Button("Upload").style(full_width=False)
96
- with gr.Row():
97
- loadingbox = gr.Textbox("Status", visible=False)
98
- button.click(make_status_box_visible, outputs=[loadingbox, button])
99
- button.click(pdf_to_text, inputs=[upload], outputs=[loadingbox, chat_interface, upload_interface])
100
-
101
-
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- demo.queue(concurrency_count=20).launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ from PyPDF2 import PdfReader
3
+ import tqdm
4
+ import os
5
  import openai
 
6
  import time
7
+ import gradio as gr
8
+ from langchain.embeddings.openai import OpenAIEmbeddings
9
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
10
+ from langchain.vectorstores import Chroma
11
+ from langchain.docstore.document import Document
12
+ from langchain.prompts import PromptTemplate
13
+ from langchain.document_loaders import TextLoader
14
+ from langchain.chains.question_answering import load_qa_chain
15
+ from langchain.llms import AzureOpenAI
16
+ from chromadb.utils import embedding_functions
17
+ from langchain.text_splitter import CharacterTextSplitter
18
+ from langchain.embeddings.openai import OpenAIEmbeddings
19
+ from langchain.vectorstores import Chroma
20
+ from langchain import VectorDBQA
21
+ from langchain.llms import AzureOpenAI
22
+ import openai
23
 
 
 
 
24
 
25
+ os.environ["OPENAI_API_TYPE"] = openai.api_type = "azure"
26
+ os.environ["OPENAI_API_VERSION"] = openai.api_version = "2022-12-01"
27
+ os.environ["OPENAI_API_BASE"] = openai.api_base = "https://openai-endpoint.openai.azure.com/"
28
+ os.environ["OPENAI_API_KEY"] = openai.api_key = "f056ead909e54ea0a2fb570e2febad2b"
29
 
30
 
31
+ embeddings = []
32
 
 
 
 
33
 
34
+ def pdf_to_text(file_obj, pdf_text, vectorstore, progress = gr.Progress(track_tqdm=True)):
35
+ reader = PdfReader(file_obj)
36
+ number_of_pages = len(reader.pages)
37
+ pdf_text = ""
38
+ for page_number in range(number_of_pages):
39
+ page = reader.pages[page_number]
40
+ pdf_text += page.extract_text()
41
+
42
+ text_splitter = RecursiveCharacterTextSplitter(
43
+ chunk_size = 1000,
44
+ chunk_overlap = 200,
45
+ length_function = len,)
46
+ texts = text_splitter.split_text(pdf_text)
47
+
48
 
49
+
50
+
51
+ for text in tqdm.tqdm(texts):
52
+
53
+ try:
54
+ response = openai.Embedding.create(
55
+ input=text,
56
+ engine="text-embedding-ada-002")
57
+ emb = response['data'][0]['embedding']
58
+ embeddings.append(emb)
59
  except Exception as e:
60
+ print(e)
61
+ time.sleep(5)
62
+ response = openai.Embedding.create(
63
+ input=text,
64
+ engine="text-embedding-ada-002")
65
+ emb = response['data'][0]['embedding']
66
+ embeddings.append(emb)
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ azure_embeddings = OpenAIEmbeddings(document_model_name="text-embedding-ada-002",query_model_name="text-embedding-ada-002")
70
+ vectorstore = Chroma("collection", embedding_function=azure_embeddings)
71
+ vectorstore._collection.add(
72
+ ids= [f"doc_{i}" for i in range(len(texts))],
73
+ documents=texts,
74
+ embeddings=embeddings,
75
+ metadatas=[{"source": "source"} for text in texts]
76
+ )
77
+
78
+
79
+
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ return pdf_text, vectorstore
82
+
83
+ def add_text(state, query, vectorstore):
84
+
85
+ # state = state + [(text, text + "?")]
86
+ qa = VectorDBQA.from_chain_type(llm= AzureOpenAI(deployment_name="davinci003", model_name="text-davinci-003"), chain_type="stuff", vectorstore=vectorstore)
87
+ qa = qa.run(query)
88
+ # chain.run(input_documents=docs, question=query)
89
+ state = state + [(query, qa)]
90
+ return state, state, vectorstore
91
+
92
+
93
+ with gr.Blocks(title="AOAI") as demo:
94
+ pdf_text = gr.State([])
95
+ vectorstore = gr.State([])
96
+ text_box = gr.TextArea()
97
+ upload_button = gr.UploadButton("Click to Upload a File", file_types=["pdf"])
98
+ upload_button.upload(pdf_to_text, inputs=[upload_button, pdf_text, vectorstore], outputs=[pdf_text, vectorstore])
99
+
100
+ with gr.Row():
101
+ chatbot = gr.Chatbot()
102
+ state = gr.State([])
103
 
104
+
105
+ text = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
106
+
107
+ text.submit(add_text, [state, text, vectorstore], [chatbot, state, vectorstore])
108
+
109
+