Spaces:

cloud-sean
/

AOAI-Form-Recognizer

Running

App Files Files Community

cloud-sean commited on Feb 20, 2023

Commit

2d2e179

1 Parent(s): c704005

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -89

app.py CHANGED Viewed

@@ -1,104 +1,109 @@
 import gradio as gr
 import openai
-import os
 import time
-import shutil
-from gpt_index import GPTSimpleVectorIndex, SimpleDirectoryReader
-from threading import Lock
-from typing import Optional, Tuple
-from azure.ai.formrecognizer import DocumentAnalysisClient
-from azure.core.credentials import AzureKeyCredential
-os.environ['OPENAI_API_KEY'] = "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"
-endpoint = "https://eastus.api.cognitive.microsoft.com/"
-credential = AzureKeyCredential("844948341c6d4596b77b770cf12e386b")
-form_recognizer_client = DocumentAnalysisClient(endpoint=endpoint, credential=credential)
-class ChatWrapper:
-    def __init__(self):
-        self.lock = Lock()
-    def __call__(self, input,  history: Optional[Tuple[str, str]]):
-        self.lock.acquire()
-        try:
-            history = history or []
-            documents = SimpleDirectoryReader('data').load_data()
-            index = GPTSimpleVectorIndex(documents)
-            response = index.query(input, verbose=True)
-            history.append((input, str(response)))
         except Exception as e:
-            return gr.HTML(f"Error: {e}")
-        finally:
-            self.lock.release()
-        return history, history
-def make_status_box_visible():
-    return gr.update(visible=True), gr.update(visible=False)
-def create_index():
-    documents = SimpleDirectoryReader('data').load_data()
-    index = GPTSimpleVectorIndex(documents)
-def pdf_to_text(file_obj, progress=gr.Progress()):
-    progress(0.2, desc="Uploading file...")
-    with open(file_obj.name, "rb") as f:
-        progress(0.5, desc="Analyzing file...")
-        poller = form_recognizer_client.begin_analyze_document("prebuilt-document", f)
-        progress(0.8, desc="Applying OCR...")
-        result = poller.result()
-        f.close()
-    progress(0.9, desc="Azure OpenAI Magic...")
-    #save the result.content in a text file
-    # generate random stringsdsd  dawhdidsd  nvjhv dwdwdiwhd
-    import random, string
-    with open("data/" + ''.join(random.choices(string.ascii_uppercase + string.digits, k = 10)) + ".txt", "w") as f:
-        f.write(str(result.content))
-        f.close()
-    # create_index()
-    progress(1.0, desc="Done!")
-    time.sleep(1.5)
-    return str(result.content), gr.update(visible=True), gr.update(visible=False)
-chat = ChatWrapper()
-# rabbndi dawdwda wadawd dwad aidiodsdawhd hjsssbjhjbhjb ddw
-with gr.Blocks(css="footer {visibility: hidden;}", theme="grass") as demo:
-    chat_history_state = gr.State()
-    pdf_content = gr.State()
-    gr.Markdown("""
-    <sub><sup>created by [@shamill](https://whoplus.microsoft.com/?_vwp=true&_vwpAlias=SHAMILL)</sup></sub>
-    # Customized GPT-3 Chatbot
-    GPT-3.5 is a powerful language model, it can be used to create a chatbot that can have a conversation with you. This demo allows you to customize the context of the conversation, and the chatbot will stick to the confines of the context you provide, avoiding made up answers. The chatbot is powered by Azure's OpenAI GPT-3 API.""")
-    ### this is where they will upload the pdf
-    with gr.Column(visible=False) as chat_interface:
-        with gr.Row():
-            chatbot = gr.Chatbot()
-        with gr.Row():
-            message_box = gr.Textbox(lines=2, placeholder="Type a message...", default="Hi there!")
-            submit_button = gr.Button("Submit").style(full_width=False)
-            submit_button.click(chat, inputs=[message_box, chat_history_state], outputs=[chatbot, chat_history_state])
-    with gr.Column(visible=True) as upload_interface:
-        with gr.Row():
-            upload = gr.File(fn=pdf_to_text, label="Upload a context pdf file", type="file")
-        with gr.Row():
-            button = gr.Button("Upload").style(full_width=False)
-        with gr.Row():
-            loadingbox = gr.Textbox("Status", visible=False)
-            button.click(make_status_box_visible, outputs=[loadingbox, button])
-            button.click(pdf_to_text, inputs=[upload], outputs=[loadingbox, chat_interface, upload_interface])
-demo.queue(concurrency_count=20).launch()

 import gradio as gr
+from PyPDF2 import PdfReader
+import tqdm
+import os
 import openai
 import time
+import gradio as gr
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.docstore.document import Document
+from langchain.prompts import PromptTemplate
+from langchain.document_loaders import TextLoader
+from langchain.chains.question_answering import load_qa_chain
+from langchain.llms import AzureOpenAI
+from chromadb.utils import embedding_functions
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.vectorstores import Chroma
+from langchain import VectorDBQA
+from langchain.llms import AzureOpenAI
+import openai
+os.environ["OPENAI_API_TYPE"] = openai.api_type = "azure"
+os.environ["OPENAI_API_VERSION"] = openai.api_version = "2022-12-01"
+os.environ["OPENAI_API_BASE"] = openai.api_base = "https://openai-endpoint.openai.azure.com/"
+os.environ["OPENAI_API_KEY"] = openai.api_key = "f056ead909e54ea0a2fb570e2febad2b"
+embeddings = []
+def pdf_to_text(file_obj, pdf_text, vectorstore, progress = gr.Progress(track_tqdm=True)):
+    reader = PdfReader(file_obj)
+    number_of_pages = len(reader.pages)
+    pdf_text = ""
+    for page_number in range(number_of_pages):
+        page = reader.pages[page_number]
+        pdf_text += page.extract_text()
+    text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size = 1000,
+    chunk_overlap  = 200,
+    length_function = len,)
+    texts = text_splitter.split_text(pdf_text)
+    for text in tqdm.tqdm(texts):
+        try:
+            response = openai.Embedding.create(
+            input=text,
+            engine="text-embedding-ada-002")
+            emb = response['data'][0]['embedding']
+            embeddings.append(emb)
         except Exception as e:
+            print(e)
+            time.sleep(5)
+            response = openai.Embedding.create(
+            input=text,
+            engine="text-embedding-ada-002")
+            emb = response['data'][0]['embedding']
+            embeddings.append(emb)
+    azure_embeddings = OpenAIEmbeddings(document_model_name="text-embedding-ada-002",query_model_name="text-embedding-ada-002")
+    vectorstore = Chroma("collection", embedding_function=azure_embeddings)
+    vectorstore._collection.add(
+    ids= [f"doc_{i}" for i in range(len(texts))],
+    documents=texts,
+    embeddings=embeddings,
+    metadatas=[{"source": "source"} for text in texts]
+    )
+    return pdf_text, vectorstore
+def add_text(state, query, vectorstore):
+    # state = state + [(text, text + "?")]
+    qa = VectorDBQA.from_chain_type(llm= AzureOpenAI(deployment_name="davinci003", model_name="text-davinci-003"), chain_type="stuff", vectorstore=vectorstore)
+    qa = qa.run(query)
+    # chain.run(input_documents=docs, question=query)
+    state = state + [(query, qa)]
+    return state, state, vectorstore
+with gr.Blocks(title="AOAI") as demo:
+    pdf_text = gr.State([])
+    vectorstore = gr.State([])
+    text_box = gr.TextArea()
+    upload_button = gr.UploadButton("Click to Upload a File", file_types=["pdf"])
+    upload_button.upload(pdf_to_text, inputs=[upload_button, pdf_text, vectorstore], outputs=[pdf_text, vectorstore])
+    with gr.Row():
+        chatbot = gr.Chatbot()
+        state = gr.State([])
+        text = gr.Textbox(show_label=False, placeholder="Enter text and press enter").style(container=False)
+        text.submit(add_text, [state, text, vectorstore], [chatbot, state, vectorstore])