umairahmad89 commited on
Commit
cc83df3
·
0 Parent(s):

initial commit

Browse files
Files changed (7) hide show
  1. .gitignore +6 -0
  2. app.py +98 -0
  3. assistant.py +174 -0
  4. assistant_file_handler.py +26 -0
  5. assistant_utils.py +72 -0
  6. requirements.txt +4 -0
  7. utils.py +12 -0
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .env
2
+ data/
3
+ raw_data/
4
+ submission/
5
+ *.zip
6
+ __pycache__/*
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Tuple
3
+ import gradio as gr
4
+ from assistant import OAIAssistant
5
+ import utils
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ class PPTChat:
12
+ def __init__(self) -> None:
13
+ assistant_id = os.environ.get("ASSISTANT_ID")
14
+ vector_store_id = os.environ.get("VECTORSTORE_ID")
15
+
16
+ self.assistant = OAIAssistant(
17
+ assistant_id=assistant_id, vectorstore_id=vector_store_id
18
+ )
19
+ self.thread_id = self.assistant.create_thread().id
20
+
21
+ def chat(self, message: str, history: List[str]):
22
+ response = self.assistant.chat(message, self.thread_id)
23
+ history.append((message, response["answer"]))
24
+ return (
25
+ "",
26
+ history,
27
+ )
28
+
29
+ def create_thread(self):
30
+ self.assistant.delete_thread(self.thread_id)
31
+ self.thread_id = self.assistant.create_thread().id
32
+
33
+ def add_file(self, file:gr.File):
34
+ self.assistant.add_file(file)
35
+
36
+ def add_message(self, history: List[Tuple], message: dict) -> Tuple[List[Tuple], gr.MultimodalTextbox]:
37
+ for file in message["files"]:
38
+ file_type = utils.file_type(file)
39
+ if file_type:
40
+ history.append((f"Uploaded {file_type.upper()} file: {file}", None))
41
+ self.add_file(file)
42
+ else:
43
+ history.append((f"Unsupported file type: {file}", None))
44
+
45
+ if message["text"]:
46
+ history.append((message["text"], None))
47
+
48
+ return history, gr.MultimodalTextbox(value=None, interactive=True)
49
+
50
+ def bot_response(self, history: List[Tuple]) -> List[Tuple]:
51
+
52
+ last_message = history[-1][0]
53
+ response = self.assistant.chat(last_message, self.thread_id)
54
+ history[-1] = (history[-1][0], response)
55
+ print(">>>>>>>>>>>>>>>>>>",response)
56
+ return history
57
+
58
+ def create_interface(self):
59
+ with gr.Blocks(fill_height=True) as demo:
60
+ chatbot = gr.Chatbot(
61
+ elem_id="chatbot",
62
+ bubble_full_width=False,
63
+ scale=1,
64
+ )
65
+
66
+ chat_input = gr.MultimodalTextbox(
67
+ interactive=True,
68
+ file_count="multiple",
69
+ placeholder="Enter message or upload file...",
70
+ show_label=False
71
+ )
72
+
73
+ chat_msg = chat_input.submit(
74
+ self.add_message,
75
+ [chatbot, chat_input],
76
+ [chatbot, chat_input]
77
+ )
78
+ bot_msg = chat_msg.then(
79
+ self.bot_response,
80
+ chatbot,
81
+ chatbot,
82
+ api_name="bot_response"
83
+ )
84
+ bot_msg.then(
85
+ lambda: gr.MultimodalTextbox(interactive=True),
86
+ None,
87
+ [chat_input]
88
+ )
89
+
90
+ return demo
91
+
92
+
93
+ if __name__ == "__main__":
94
+
95
+ chatbot = PPTChat()
96
+
97
+ interface = chatbot.create_interface()
98
+ interface.launch()
assistant.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict
3
+ import time
4
+ from openai import OpenAI
5
+ from assistant_file_handler import FileHandler
6
+ from openai.types.beta.thread import Thread
7
+ from openai.types.beta.threads.message import Message
8
+
9
+ import structlog
10
+ from openai.pagination import SyncCursorPage
11
+
12
+
13
+ class OAIAssistant:
14
+ def __init__(self, assistant_id, vectorstore_id) -> None:
15
+ self.file_handler = FileHandler()
16
+ self.assistant_id = assistant_id
17
+ self.vectorstore_id = vectorstore_id
18
+ self.client = OpenAI()
19
+ self.openai_assistant = self.client.beta.assistants.retrieve(
20
+ assistant_id=self.assistant_id
21
+ )
22
+ self.log = structlog.get_logger()
23
+
24
+ def create(self):
25
+ pass
26
+
27
+ def add_file(self, file_path: str):
28
+ file_id = self.file_handler.add(file_path=file_path).id
29
+ self.client.beta.vector_stores.files.create(
30
+ file_id=file_id, vector_store_id=self.vectorstore_id
31
+ )
32
+
33
+ def remove_file(self, file_id: str):
34
+ self.client.beta.vector_stores.files.delete(
35
+ file_id=file_id, vector_store_id=self.vectorstore_id
36
+ )
37
+ self.log.info(
38
+ f"OAIAssistant: Deleted file with id {file_id} from vector database"
39
+ )
40
+ self.file_handler.remove(file_id=file_id)
41
+ self.log.info(f"OAIAssistant: Deleted file with id {file_id} from file storage")
42
+
43
+ def chat(self, query: str, thread_id: str):
44
+ try:
45
+ if not thread_id:
46
+ thread = self.create_thread().id
47
+ thread_id = thread.id
48
+ # else:
49
+ # thread_id = self.client.beta.threads.retrieve(thread_id).id
50
+
51
+ self.client.beta.threads.messages.create(
52
+ thread_id=thread_id,
53
+ role="user",
54
+ content=query,
55
+ )
56
+ self.log.info(
57
+ "OAIAssistant: Message added to thread",
58
+ thread_id=thread_id,
59
+ query=query,
60
+ )
61
+
62
+ new_message, message_file_ids = self.__run_assistant(thread_id=thread_id)
63
+ file_paths = []
64
+ for msg_file_id in message_file_ids:
65
+ png_file_path = f"./tmp/{msg_file_id}.png"
66
+ self.__convert_file_to_png(
67
+ file_id=msg_file_id, write_path=png_file_path
68
+ )
69
+ file_paths.append(png_file_path)
70
+
71
+ file_ids = self.__add_files(file_paths=file_paths)
72
+
73
+ self.client.beta.threads.messages.create(
74
+ thread_id=thread_id,
75
+ role="assistant",
76
+ content=new_message,
77
+ attachments=[
78
+ {"file_id": file_id, "tools": [{"type": "file_search"}]}
79
+ for _, file_id in file_ids.items()
80
+ ]
81
+ if file_ids
82
+ else None,
83
+ )
84
+ self.log.info(
85
+ "OAIAssistant: Assistant response generated", response=new_message
86
+ )
87
+ return new_message
88
+ except Exception as e:
89
+ self.log.error("OAIAssistant: Error generating response", error=str(e))
90
+ import traceback
91
+
92
+ self.log.error(traceback.print_exc())
93
+ return "OAIAssistant: An error occurred while generating the response."
94
+
95
+ def create_thread(self) -> Thread:
96
+ thread: Thread = self.client.beta.threads.create(tool_resources={"file_search": {"vector_store_ids": [self.vectorstore_id]}})
97
+ return thread
98
+
99
+ def delete_thread(self, thread_id: str):
100
+ self.client.beta.threads.delete(thread_id=thread_id)
101
+ self.log.info(f"OAIAssistant: Deleted thread with id: {thread_id}")
102
+
103
+ def __convert_file_to_png(self, file_id, write_path):
104
+ try:
105
+ data = self.client.files.content(file_id)
106
+ data_bytes = data.read()
107
+ with open(write_path, "wb") as file:
108
+ file.write(data_bytes)
109
+ self.log.info("OAIAssistant: File converted to PNG", file_path=write_path)
110
+ except Exception as e:
111
+ self.log.error("OAIAssistant: Error converting file to PNG", error=str(e))
112
+ raise
113
+
114
+ def __add_files(self, file_paths: List[str]) -> Dict[str, str]:
115
+ try:
116
+ files = {}
117
+ for file in file_paths:
118
+ filename = os.path.basename(file)
119
+ file = self.file_handler.add(file)
120
+ files[filename] = file.id
121
+ self.log.info("OAIAssistant: Files added", files=files)
122
+ return files
123
+ except Exception as e:
124
+ self.log.error("OAIAssistant: Error adding files", error=str(e))
125
+ raise
126
+
127
+ def __run_assistant(self, thread_id: str):
128
+ try:
129
+ run = self.client.beta.threads.runs.create(
130
+ thread_id=thread_id,
131
+ assistant_id=self.assistant_id,
132
+ )
133
+ self.log.info("OAIAssistant: Assistant run started", run_id=run.id)
134
+
135
+ while run.status != "completed":
136
+ time.sleep(1)
137
+ run = self.client.beta.threads.runs.retrieve(
138
+ thread_id=thread_id, run_id=run.id
139
+ )
140
+ if run.status == "failed":
141
+ self.log.error(
142
+ "OAIAssistant: Assistant run failed",
143
+ run_id=run.id,
144
+ )
145
+ self.log.error(run)
146
+ return "OAIAssistant: Error in generating response", []
147
+
148
+ messages: SyncCursorPage[Message] = self.client.beta.threads.messages.list(
149
+ thread_id=thread_id, run_id=run.id
150
+ )
151
+ new_message, file_ids = self.__extract_messages(messages)
152
+
153
+ return new_message, file_ids
154
+ except Exception as e:
155
+ self.log.error("OAIAssistant: Error running assistant", error=str(e))
156
+ raise
157
+
158
+ def __extract_messages(self, messages: SyncCursorPage[Message]):
159
+ try:
160
+ new_message = ""
161
+ file_ids = []
162
+ for message in messages.data:
163
+ if message.content[0].type == "text":
164
+ new_message += message.content[0].text.value
165
+ elif message.content[0].type == "image_file":
166
+ new_message += "Image File:\n"
167
+ new_message += message.content[0].image_file.file_id
168
+ new_message += "\n\n"
169
+ file_ids.append(message.content[0].image_file.file_id)
170
+ self.log.info("OAIAssistant: Messages extracted", message=new_message)
171
+ return new_message, file_ids
172
+ except Exception as e:
173
+ self.log.error("OAIAssistant: Error extracting messages", error=str(e))
174
+ raise
assistant_file_handler.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from openai.types.file_object import FileObject
3
+ import structlog
4
+
5
+
6
+ class FileHandler:
7
+ def __init__(self) -> None:
8
+ self.client = OpenAI()
9
+ self.log = structlog.get_logger()
10
+
11
+ def add(self, file_path: str)->FileObject:
12
+ """
13
+ Adds the file to vectorstore and returns a file_id
14
+ """
15
+ # read file
16
+ self.log.info(f"File Handler: Reading File with {file_path}")
17
+ file_obj = open(file_path, "rb")
18
+ self.log.info("File Handler: Adding file")
19
+ file: FileObject = self.client.files.create(file=file_obj, purpose="assistants")
20
+ self.log.info(f"File Handler: Created file object with id: {file.id}")
21
+
22
+ return file
23
+
24
+ def remove(self, file_id: str):
25
+ self.client.files.delete(file_id=file_id)
26
+ self.log.info(f"File Handler: Deleted file object with id: {file_id}")
assistant_utils.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from openai.types.beta.vector_store import VectorStore
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ client = OpenAI()
8
+
9
+ vector_store: VectorStore = client.beta.vector_stores.create(name="dairyfarm-data")
10
+ file1 = open("./data/China Dairy Imports Template AB Copy (Alyssa Badger's conflicted copy 2024-08-23).pptx", "rb")
11
+ file2 = open("./data/US Dairy Exports-Imports Template.BW.2.pptx", "rb")
12
+ file3 = open("./data/Cold Storage Publisher.pdf", "rb")
13
+ file4 = open("./data/GDT Auction Analysis Template Publisher.pdf", "rb")
14
+ file5 = open("./data/Publisher US Milk Production 06-21-21.2.BW.pdf", "rb")
15
+
16
+
17
+ file_id1 = client.files.create(file=file1, purpose="assistants")
18
+ file_id2 = client.files.create(file=file2, purpose="assistants")
19
+ file_id3 = client.files.create(file=file3, purpose="assistants")
20
+ file_id4 = client.files.create(file=file4, purpose="assistants")
21
+ file_id5 = client.files.create(file=file5, purpose="assistants")
22
+
23
+
24
+ instructions = """
25
+ You are an advanced document analysis AI specialized in extracting product insights from PDFs and PowerPoint (PPTX) files. Your primary tasks are:
26
+
27
+ 1. Analyze the content of uploaded PDF and PPTX files.
28
+ 2. Identify and extract key information about products mentioned in these documents.
29
+ 3. Provide concise yet comprehensive insights about the products, including features, benefits, specifications, and market positioning.
30
+ 4. Offer comparative analysis if multiple products are mentioned.
31
+ 5. Highlight any unique selling points or innovative aspects of the products.
32
+
33
+ Greeting Instructions:
34
+ - Always start your interaction with a polite and professional greeting.
35
+ - Use the user's name if provided, otherwise use a general greeting.
36
+ - Tailor your greeting based on the time of day (morning, afternoon, evening) if that information is available.
37
+ - Keep your initial greeting brief and get to the point quickly.
38
+
39
+ Example greetings:
40
+ - "Hello [Name], welcome to the Product Insight Assistant. How can I help you today?"
41
+ - "Good [morning/afternoon/evening]. I'm ready to analyze your product documents. What would you like to know?"
42
+
43
+ After greeting, promptly ask what specific document the user would like you to analyze or what product information they're seeking.
44
+
45
+ Remember:
46
+ - Maintain a professional and neutral tone throughout the interaction.
47
+ - If you encounter any ambiguities or need more information, don't hesitate to ask for clarification.
48
+ - Respect confidentiality and do not share or retain any sensitive information from the documents.
49
+ - If asked about topics outside your specialization, politely redirect the conversation back to product analysis or refer the user to appropriate resources.
50
+
51
+ Your goal is to provide accurate, insightful, and actionable information about products based on the document analysis, helping users make informed decisions or gain a deeper understanding of the products described in their files.
52
+ """
53
+
54
+ descriptions = """
55
+ Product Insight Analyzer: An AI-powered assistant that examines PDF and PowerPoint files to extract key product information. It provides detailed insights on features, benefits, and market positioning, offering comparative analysis when multiple products are involved. This tool helps users quickly understand and evaluate products described in complex documents, supporting informed decision-making and in-depth product comprehension.
56
+ """
57
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id1.id)
58
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id2.id)
59
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id3.id)
60
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id4.id)
61
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id5.id)
62
+
63
+
64
+ assistant = client.beta.assistants.create(
65
+ name="Dairyfarm Assistant",
66
+ instructions=instructions,
67
+ description=descriptions,
68
+ model="gpt-4o-mini",
69
+ tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
70
+ tools=[{"type": "file_search"}]
71
+ )
72
+ print(assistant)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ python-dotenv
4
+ structlog
utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def file_type(file_path, allowed_extensions=['pdf', 'pptx', 'docx']):
4
+
5
+ _, extension = os.path.splitext(file_path)
6
+ extension = extension.lower()[1:] # Remove the dot and convert to lowercase
7
+
8
+ if extension in allowed_extensions:
9
+ return extension
10
+
11
+ # If neither MIME type nor extension match, return None
12
+ return None