Spaces:

dairyfarm
/

dairy-reports-analysis

Runtime error

App Files Files Community

umairahmad89 commited on Sep 4, 2024

Commit

cc83df3

0 Parent(s):

initial commit

Browse files

Files changed (7) hide show

.gitignore +6 -0
app.py +98 -0
assistant.py +174 -0
assistant_file_handler.py +26 -0
assistant_utils.py +72 -0
requirements.txt +4 -0
utils.py +12 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.env
+data/
+raw_data/
+submission/
+*.zip
+__pycache__/*

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+from typing import List, Tuple
+import gradio as gr
+from assistant import OAIAssistant
+import utils
+from dotenv import load_dotenv
+load_dotenv()
+class PPTChat:
+    def __init__(self) -> None:
+        assistant_id = os.environ.get("ASSISTANT_ID")
+        vector_store_id = os.environ.get("VECTORSTORE_ID")
+        self.assistant = OAIAssistant(
+            assistant_id=assistant_id, vectorstore_id=vector_store_id
+        )
+        self.thread_id = self.assistant.create_thread().id
+    def chat(self, message: str, history: List[str]):
+        response = self.assistant.chat(message, self.thread_id)
+        history.append((message, response["answer"]))
+        return (
+            "",
+            history,
+        )
+    def create_thread(self):
+        self.assistant.delete_thread(self.thread_id)
+        self.thread_id = self.assistant.create_thread().id
+    def add_file(self, file:gr.File):
+        self.assistant.add_file(file)
+    def add_message(self, history: List[Tuple], message: dict) -> Tuple[List[Tuple], gr.MultimodalTextbox]:
+        for file in message["files"]:
+            file_type = utils.file_type(file)
+            if file_type:
+                history.append((f"Uploaded {file_type.upper()} file: {file}", None))
+                self.add_file(file)
+            else:
+                history.append((f"Unsupported file type: {file}", None))
+        if message["text"]:
+            history.append((message["text"], None))
+        return history, gr.MultimodalTextbox(value=None, interactive=True)
+    def bot_response(self, history: List[Tuple]) -> List[Tuple]:
+        last_message = history[-1][0]
+        response = self.assistant.chat(last_message, self.thread_id)
+        history[-1] = (history[-1][0], response)
+        print(">>>>>>>>>>>>>>>>>>",response)
+        return history
+    def create_interface(self):
+        with gr.Blocks(fill_height=True) as demo:
+            chatbot = gr.Chatbot(
+                elem_id="chatbot",
+                bubble_full_width=False,
+                scale=1,
+            )
+            chat_input = gr.MultimodalTextbox(
+                interactive=True,
+                file_count="multiple",
+                placeholder="Enter message or upload file...",
+                show_label=False
+            )
+            chat_msg = chat_input.submit(
+                self.add_message,
+                [chatbot, chat_input],
+                [chatbot, chat_input]
+            )
+            bot_msg = chat_msg.then(
+                self.bot_response,
+                chatbot,
+                chatbot,
+                api_name="bot_response"
+            )
+            bot_msg.then(
+                lambda: gr.MultimodalTextbox(interactive=True),
+                None,
+                [chat_input]
+            )
+        return demo
+if __name__ == "__main__":
+    chatbot = PPTChat()
+    interface = chatbot.create_interface()
+    interface.launch()

assistant.py ADDED Viewed

	@@ -0,0 +1,174 @@

+import os
+from typing import List, Dict
+import time
+from openai import OpenAI
+from assistant_file_handler import FileHandler
+from openai.types.beta.thread import Thread
+from openai.types.beta.threads.message import Message
+import structlog
+from openai.pagination import SyncCursorPage
+class OAIAssistant:
+    def __init__(self, assistant_id, vectorstore_id) -> None:
+        self.file_handler = FileHandler()
+        self.assistant_id = assistant_id
+        self.vectorstore_id = vectorstore_id
+        self.client = OpenAI()
+        self.openai_assistant = self.client.beta.assistants.retrieve(
+            assistant_id=self.assistant_id
+        )
+        self.log = structlog.get_logger()
+    def create(self):
+        pass
+    def add_file(self, file_path: str):
+        file_id = self.file_handler.add(file_path=file_path).id
+        self.client.beta.vector_stores.files.create(
+            file_id=file_id, vector_store_id=self.vectorstore_id
+        )
+    def remove_file(self, file_id: str):
+        self.client.beta.vector_stores.files.delete(
+            file_id=file_id, vector_store_id=self.vectorstore_id
+        )
+        self.log.info(
+            f"OAIAssistant: Deleted file with id {file_id} from vector database"
+        )
+        self.file_handler.remove(file_id=file_id)
+        self.log.info(f"OAIAssistant: Deleted file with id {file_id} from file storage")
+    def chat(self, query: str, thread_id: str):
+        try:
+            if not thread_id:
+                thread = self.create_thread().id
+                thread_id = thread.id
+            # else:
+            #     thread_id = self.client.beta.threads.retrieve(thread_id).id
+            self.client.beta.threads.messages.create(
+                thread_id=thread_id,
+                role="user",
+                content=query,
+            )
+            self.log.info(
+                "OAIAssistant: Message added to thread",
+                thread_id=thread_id,
+                query=query,
+            )
+            new_message, message_file_ids = self.__run_assistant(thread_id=thread_id)
+            file_paths = []
+            for msg_file_id in message_file_ids:
+                png_file_path = f"./tmp/{msg_file_id}.png"
+                self.__convert_file_to_png(
+                    file_id=msg_file_id, write_path=png_file_path
+                )
+                file_paths.append(png_file_path)
+            file_ids = self.__add_files(file_paths=file_paths)
+            self.client.beta.threads.messages.create(
+                thread_id=thread_id,
+                role="assistant",
+                content=new_message,
+                attachments=[
+                    {"file_id": file_id, "tools": [{"type": "file_search"}]}
+                    for _, file_id in file_ids.items()
+                ]
+                if file_ids
+                else None,
+            )
+            self.log.info(
+                "OAIAssistant: Assistant response generated", response=new_message
+            )
+            return new_message
+        except Exception as e:
+            self.log.error("OAIAssistant: Error generating response", error=str(e))
+            import traceback
+            self.log.error(traceback.print_exc())
+            return "OAIAssistant: An error occurred while generating the response."
+    def create_thread(self) -> Thread:
+        thread: Thread = self.client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [self.vectorstore_id]}})
+        return thread
+    def delete_thread(self, thread_id: str):
+        self.client.beta.threads.delete(thread_id=thread_id)
+        self.log.info(f"OAIAssistant: Deleted thread with id: {thread_id}")
+    def __convert_file_to_png(self, file_id, write_path):
+        try:
+            data = self.client.files.content(file_id)
+            data_bytes = data.read()
+            with open(write_path, "wb") as file:
+                file.write(data_bytes)
+            self.log.info("OAIAssistant: File converted to PNG", file_path=write_path)
+        except Exception as e:
+            self.log.error("OAIAssistant: Error converting file to PNG", error=str(e))
+            raise
+    def __add_files(self, file_paths: List[str]) -> Dict[str, str]:
+        try:
+            files = {}
+            for file in file_paths:
+                filename = os.path.basename(file)
+                file = self.file_handler.add(file)
+                files[filename] = file.id
+            self.log.info("OAIAssistant: Files added", files=files)
+            return files
+        except Exception as e:
+            self.log.error("OAIAssistant: Error adding files", error=str(e))
+            raise
+    def __run_assistant(self, thread_id: str):
+        try:
+            run = self.client.beta.threads.runs.create(
+                thread_id=thread_id,
+                assistant_id=self.assistant_id,
+            )
+            self.log.info("OAIAssistant: Assistant run started", run_id=run.id)
+            while run.status != "completed":
+                time.sleep(1)
+                run = self.client.beta.threads.runs.retrieve(
+                    thread_id=thread_id, run_id=run.id
+                )
+                if run.status == "failed":
+                    self.log.error(
+                        "OAIAssistant:  Assistant run failed",
+                        run_id=run.id,
+                    )
+                    self.log.error(run)
+                    return "OAIAssistant: Error in generating response", []
+            messages: SyncCursorPage[Message] = self.client.beta.threads.messages.list(
+                thread_id=thread_id, run_id=run.id
+            )
+            new_message, file_ids = self.__extract_messages(messages)
+            return new_message, file_ids
+        except Exception as e:
+            self.log.error("OAIAssistant: Error running assistant", error=str(e))
+            raise
+    def __extract_messages(self, messages: SyncCursorPage[Message]):
+        try:
+            new_message = ""
+            file_ids = []
+            for message in messages.data:
+                if message.content[0].type == "text":
+                    new_message += message.content[0].text.value
+                elif message.content[0].type == "image_file":
+                    new_message += "Image File:\n"
+                    new_message += message.content[0].image_file.file_id
+                    new_message += "\n\n"
+                    file_ids.append(message.content[0].image_file.file_id)
+            self.log.info("OAIAssistant: Messages extracted", message=new_message)
+            return new_message, file_ids
+        except Exception as e:
+            self.log.error("OAIAssistant: Error extracting messages", error=str(e))
+            raise

assistant_file_handler.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from openai import OpenAI
+from openai.types.file_object import FileObject
+import structlog
+class FileHandler:
+    def __init__(self) -> None:
+        self.client = OpenAI()
+        self.log = structlog.get_logger()
+    def add(self, file_path: str)->FileObject:
+        """
+        Adds the file to vectorstore and returns a file_id
+        """
+        # read file
+        self.log.info(f"File Handler: Reading File with {file_path}")
+        file_obj = open(file_path, "rb")
+        self.log.info("File Handler: Adding file")
+        file: FileObject = self.client.files.create(file=file_obj, purpose="assistants")
+        self.log.info(f"File Handler: Created file object with id: {file.id}")
+        return file
+    def remove(self, file_id: str):
+        self.client.files.delete(file_id=file_id)
+        self.log.info(f"File Handler: Deleted file object with id: {file_id}")

assistant_utils.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from openai import OpenAI
+from openai.types.beta.vector_store import VectorStore
+from dotenv import load_dotenv
+load_dotenv()
+client = OpenAI()
+vector_store: VectorStore = client.beta.vector_stores.create(name="dairyfarm-data")
+file1 = open("./data/China Dairy Imports Template AB Copy (Alyssa Badger's conflicted copy 2024-08-23).pptx", "rb")
+file2 = open("./data/US Dairy Exports-Imports Template.BW.2.pptx", "rb")
+file3 = open("./data/Cold Storage Publisher.pdf", "rb")
+file4 = open("./data/GDT Auction Analysis Template Publisher.pdf", "rb")
+file5 = open("./data/Publisher US Milk Production 06-21-21.2.BW.pdf", "rb")
+file_id1 = client.files.create(file=file1, purpose="assistants")
+file_id2 = client.files.create(file=file2, purpose="assistants")
+file_id3 = client.files.create(file=file3, purpose="assistants")
+file_id4 = client.files.create(file=file4, purpose="assistants")
+file_id5 = client.files.create(file=file5, purpose="assistants")
+instructions = """
+You are an advanced document analysis AI specialized in extracting product insights from PDFs and PowerPoint (PPTX) files. Your primary tasks are:
+1. Analyze the content of uploaded PDF and PPTX files.
+2. Identify and extract key information about products mentioned in these documents.
+3. Provide concise yet comprehensive insights about the products, including features, benefits, specifications, and market positioning.
+4. Offer comparative analysis if multiple products are mentioned.
+5. Highlight any unique selling points or innovative aspects of the products.
+Greeting Instructions:
+- Always start your interaction with a polite and professional greeting.
+- Use the user's name if provided, otherwise use a general greeting.
+- Tailor your greeting based on the time of day (morning, afternoon, evening) if that information is available.
+- Keep your initial greeting brief and get to the point quickly.
+Example greetings:
+- "Hello [Name], welcome to the Product Insight Assistant. How can I help you today?"
+- "Good [morning/afternoon/evening]. I'm ready to analyze your product documents. What would you like to know?"
+After greeting, promptly ask what specific document the user would like you to analyze or what product information they're seeking.
+Remember:
+- Maintain a professional and neutral tone throughout the interaction.
+- If you encounter any ambiguities or need more information, don't hesitate to ask for clarification.
+- Respect confidentiality and do not share or retain any sensitive information from the documents.
+- If asked about topics outside your specialization, politely redirect the conversation back to product analysis or refer the user to appropriate resources.
+Your goal is to provide accurate, insightful, and actionable information about products based on the document analysis, helping users make informed decisions or gain a deeper understanding of the products described in their files.
+"""
+descriptions = """
+Product Insight Analyzer: An AI-powered assistant that examines PDF and PowerPoint files to extract key product information. It provides detailed insights on features, benefits, and market positioning, offering comparative analysis when multiple products are involved. This tool helps users quickly understand and evaluate products described in complex documents, supporting informed decision-making and in-depth product comprehension.
+"""
+client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id1.id)
+client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id2.id)
+client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id3.id)
+client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id4.id)
+client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id5.id)
+assistant = client.beta.assistants.create(
+    name="Dairyfarm Assistant",
+    instructions=instructions,
+    description=descriptions,
+    model="gpt-4o-mini",
+    tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
+    tools=[{"type": "file_search"}]
+)
+print(assistant)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+openai
+gradio
+python-dotenv
+structlog

utils.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+def file_type(file_path, allowed_extensions=['pdf', 'pptx', 'docx']):
+    _, extension = os.path.splitext(file_path)
+    extension = extension.lower()[1:]  # Remove the dot and convert to lowercase
+    if extension in allowed_extensions:
+        return extension
+    # If neither MIME type nor extension match, return None
+    return None