umairahmad1789 commited on
Commit
c71f981
·
verified ·
1 Parent(s): 3c79971

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +95 -0
  2. assistant.py +184 -0
  3. assistant_file_handler.py +26 -0
  4. assistant_utils.py +72 -0
  5. requirements.txt +4 -0
  6. utils.py +12 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Tuple
3
+ import gradio as gr
4
+ from assistant import OAIAssistant
5
+ import utils
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+
11
+ class VisaPolicyAgent:
12
+ def __init__(self) -> None:
13
+ assistant_id = os.environ.get("ASSISTANT_ID")
14
+ vector_store_id = os.environ.get("VECTORSTORE_ID")
15
+
16
+ self.assistant = OAIAssistant(
17
+ assistant_id=assistant_id, vectorstore_id=vector_store_id
18
+ )
19
+ self.thread_id = self.assistant.create_thread().id
20
+
21
+ def chat(self, message: str, history: List[str]):
22
+ response = self.assistant.chat(message, self.thread_id)
23
+ history.append((message, response["answer"]))
24
+ return (
25
+ "",
26
+ history,
27
+ )
28
+
29
+ def create_thread(self):
30
+ try:
31
+ self.assistant.delete_thread(self.thread_id)
32
+ self.thread_id = self.assistant.create_thread().id
33
+ gr.Info(message="New thread created. Start as New!")
34
+ except Exception as e:
35
+ gr.Error(message=f"Unable to create new thread. Error: {e}")
36
+
37
+ def clear_all(self):
38
+ # create new thread
39
+ # delete previous files
40
+ # create new chatbot
41
+
42
+ self.create_thread()
43
+ gr.Info(message="Created new thread")
44
+ try:
45
+ assistant_file_ids = self.assistant.get_files_list()
46
+ for file_id in assistant_file_ids:
47
+ self.assistant.remove_file(file_id=file_id)
48
+
49
+ gr.Info(message="Deleted files in assistant")
50
+
51
+ except Exception as e:
52
+ gr.Error(message=f"Unable to delete files. Error: {e}")
53
+
54
+ gr.Info("Chat is cleared.")
55
+
56
+ return [("Clear Chatbot", "Chatbot cleared.")]
57
+
58
+ def add_file(self, file: gr.File):
59
+ self.assistant.add_file(file)
60
+
61
+ def bot_response(self, message, history: List[Tuple]) -> List[Tuple]:
62
+
63
+ response = self.assistant.chat(message, self.thread_id)
64
+ history.append((message, response))
65
+ return "", history
66
+
67
+ def create_interface(self):
68
+ with gr.Blocks() as demo:
69
+ chatbot = gr.Chatbot(height=400)
70
+ message = gr.Textbox(
71
+ placeholder="Ask me anything",
72
+ scale=7,
73
+ label="Input message",
74
+ )
75
+ new_thread_button = gr.Button(value="Create New Thread")
76
+ clear_button = gr.Button(value="Clear All")
77
+
78
+ _ = gr.ClearButton([message, chatbot])
79
+
80
+ message.submit(
81
+ self.bot_response,
82
+ inputs=[message, chatbot],
83
+ outputs=[message, chatbot],
84
+ )
85
+ new_thread_button.click(self.create_thread)
86
+ clear_button.click(self.clear_all, outputs=chatbot)
87
+
88
+ return demo
89
+
90
+
91
+ if __name__ == "__main__":
92
+ vp_chatbot = VisaPolicyAgent()
93
+
94
+ interface = vp_chatbot.create_interface()
95
+ interface.launch()
assistant.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict
3
+ import time
4
+ from openai import OpenAI
5
+ from assistant_file_handler import FileHandler
6
+ from openai.types.beta.thread import Thread
7
+ from openai.types.beta.threads.message import Message
8
+ from openai.types.beta.vector_stores.vector_store_file import VectorStoreFile
9
+
10
+
11
+ import structlog
12
+ from openai.pagination import SyncCursorPage
13
+
14
+
15
+ class OAIAssistant:
16
+ def __init__(self, assistant_id, vectorstore_id) -> None:
17
+ self.file_handler = FileHandler()
18
+ self.assistant_id = assistant_id
19
+ self.vectorstore_id = vectorstore_id
20
+ self.client = OpenAI()
21
+ self.openai_assistant = self.client.beta.assistants.retrieve(
22
+ assistant_id=self.assistant_id
23
+ )
24
+ self.log = structlog.get_logger()
25
+
26
+ def create(self):
27
+ pass
28
+
29
+ def add_file(self, file_path: str):
30
+ file_id = self.file_handler.add(file_path=file_path).id
31
+ self.client.beta.vector_stores.files.create(
32
+ file_id=file_id, vector_store_id=self.vectorstore_id
33
+ )
34
+
35
+ def remove_file(self, file_id: str):
36
+ self.client.beta.vector_stores.files.delete(
37
+ file_id=file_id, vector_store_id=self.vectorstore_id
38
+ )
39
+ self.log.info(
40
+ f"OAIAssistant: Deleted file with id {file_id} from vector database"
41
+ )
42
+ self.file_handler.remove(file_id=file_id)
43
+ self.log.info(f"OAIAssistant: Deleted file with id {file_id} from file storage")
44
+
45
+ def chat(self, query: str, thread_id: str):
46
+ try:
47
+ if not thread_id:
48
+ thread = self.create_thread().id
49
+ thread_id = thread.id
50
+ # else:
51
+ # thread_id = self.client.beta.threads.retrieve(thread_id).id
52
+
53
+ self.client.beta.threads.messages.create(
54
+ thread_id=thread_id,
55
+ role="user",
56
+ content=query,
57
+ )
58
+ self.log.info(
59
+ "OAIAssistant: Message added to thread",
60
+ thread_id=thread_id,
61
+ query=query,
62
+ )
63
+
64
+ new_message, message_file_ids = self.__run_assistant(thread_id=thread_id)
65
+ file_paths = []
66
+ for msg_file_id in message_file_ids:
67
+ png_file_path = f"./tmp/{msg_file_id}.png"
68
+ self.__convert_file_to_png(
69
+ file_id=msg_file_id, write_path=png_file_path
70
+ )
71
+ file_paths.append(png_file_path)
72
+
73
+ file_ids = self.__add_files(file_paths=file_paths)
74
+
75
+ self.client.beta.threads.messages.create(
76
+ thread_id=thread_id,
77
+ role="assistant",
78
+ content=new_message,
79
+ attachments=[
80
+ {"file_id": file_id, "tools": [{"type": "file_search"}]}
81
+ for _, file_id in file_ids.items()
82
+ ]
83
+ if file_ids
84
+ else None,
85
+ )
86
+ self.log.info(
87
+ "OAIAssistant: Assistant response generated", response=new_message
88
+ )
89
+ return new_message
90
+ except Exception as e:
91
+ self.log.error("OAIAssistant: Error generating response", error=str(e))
92
+ return "OAIAssistant: An error occurred while generating the response."
93
+
94
+ def create_thread(self) -> Thread:
95
+ thread: Thread = self.client.beta.threads.create(
96
+ tool_resources={"file_search": {"vector_store_ids": [self.vectorstore_id]}}
97
+ )
98
+ return thread
99
+
100
+ def delete_thread(self, thread_id: str):
101
+ self.client.beta.threads.delete(thread_id=thread_id)
102
+ self.log.info(f"OAIAssistant: Deleted thread with id: {thread_id}")
103
+
104
+ def __convert_file_to_png(self, file_id, write_path):
105
+ try:
106
+ data = self.client.files.content(file_id)
107
+ data_bytes = data.read()
108
+ with open(write_path, "wb") as file:
109
+ file.write(data_bytes)
110
+ self.log.info("OAIAssistant: File converted to PNG", file_path=write_path)
111
+ except Exception as e:
112
+ self.log.error("OAIAssistant: Error converting file to PNG", error=str(e))
113
+ raise
114
+
115
+ def __add_files(self, file_paths: List[str]) -> Dict[str, str]:
116
+ try:
117
+ files = {}
118
+ for file in file_paths:
119
+ filename = os.path.basename(file)
120
+ file = self.file_handler.add(file)
121
+ files[filename] = file.id
122
+ self.log.info("OAIAssistant: Files added", files=files)
123
+ return files
124
+ except Exception as e:
125
+ self.log.error("OAIAssistant: Error adding files", error=str(e))
126
+ raise
127
+
128
+ def __run_assistant(self, thread_id: str):
129
+ try:
130
+ run = self.client.beta.threads.runs.create(
131
+ thread_id=thread_id,
132
+ assistant_id=self.assistant_id,
133
+ )
134
+ self.log.info("OAIAssistant: Assistant run started", run_id=run.id)
135
+
136
+ while run.status != "completed":
137
+ time.sleep(1)
138
+ run = self.client.beta.threads.runs.retrieve(
139
+ thread_id=thread_id, run_id=run.id
140
+ )
141
+ if run.status == "failed":
142
+ self.log.error(
143
+ "OAIAssistant: Assistant run failed",
144
+ run_id=run.id,
145
+ )
146
+ self.log.info(run)
147
+
148
+ return "OAIAssistant: Error in generating response", []
149
+
150
+ messages: SyncCursorPage[Message] = self.client.beta.threads.messages.list(
151
+ thread_id=thread_id, run_id=run.id
152
+ )
153
+ new_message, file_ids = self.__extract_messages(messages)
154
+
155
+ return new_message, file_ids
156
+ except Exception as e:
157
+ self.log.error("OAIAssistant: Error running assistant", error=str(e))
158
+ raise
159
+
160
+ def __extract_messages(self, messages: SyncCursorPage[Message]):
161
+ try:
162
+ new_message = ""
163
+ file_ids = []
164
+ for message in messages.data:
165
+ if message.content[0].type == "text":
166
+ new_message += message.content[0].text.value
167
+ elif message.content[0].type == "image_file":
168
+ new_message += "Image File:\n"
169
+ new_message += message.content[0].image_file.file_id
170
+ new_message += "\n\n"
171
+ file_ids.append(message.content[0].image_file.file_id)
172
+ self.log.info("OAIAssistant: Messages extracted", message=new_message)
173
+ return new_message, file_ids
174
+ except Exception as e:
175
+ self.log.error("OAIAssistant: Error extracting messages", error=str(e))
176
+ raise
177
+
178
+ def get_files_list(self):
179
+ files: SyncCursorPage[VectorStoreFile] = (
180
+ self.client.beta.vector_stores.files.list(
181
+ vector_store_id=self.vectorstore_id
182
+ )
183
+ )
184
+ return [file.id for file in files]
assistant_file_handler.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from openai.types.file_object import FileObject
3
+ import structlog
4
+
5
+
6
+ class FileHandler:
7
+ def __init__(self) -> None:
8
+ self.client = OpenAI()
9
+ self.log = structlog.get_logger()
10
+
11
+ def add(self, file_path: str)->FileObject:
12
+ """
13
+ Adds the file to vectorstore and returns a file_id
14
+ """
15
+ # read file
16
+ self.log.info(f"File Handler: Reading File with {file_path}")
17
+ file_obj = open(file_path, "rb")
18
+ self.log.info("File Handler: Adding file")
19
+ file: FileObject = self.client.files.create(file=file_obj, purpose="assistants")
20
+ self.log.info(f"File Handler: Created file object with id: {file.id}")
21
+
22
+ return file
23
+
24
+ def remove(self, file_id: str):
25
+ self.client.files.delete(file_id=file_id)
26
+ self.log.info(f"File Handler: Deleted file object with id: {file_id}")
assistant_utils.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from openai.types.beta.vector_store import VectorStore
3
+ from dotenv import load_dotenv
4
+
5
+ load_dotenv()
6
+
7
+ client = OpenAI()
8
+
9
+ vector_store: VectorStore = client.beta.vector_stores.create(name="dairyfarm-data")
10
+ file1 = open("./data/China Dairy Imports Template AB Copy (Alyssa Badger's conflicted copy 2024-08-23).pptx", "rb")
11
+ file2 = open("./data/US Dairy Exports-Imports Template.BW.2.pptx", "rb")
12
+ file3 = open("./data/Cold Storage Publisher.pdf", "rb")
13
+ file4 = open("./data/GDT Auction Analysis Template Publisher.pdf", "rb")
14
+ file5 = open("./data/Publisher US Milk Production 06-21-21.2.BW.pdf", "rb")
15
+
16
+
17
+ file_id1 = client.files.create(file=file1, purpose="assistants")
18
+ file_id2 = client.files.create(file=file2, purpose="assistants")
19
+ file_id3 = client.files.create(file=file3, purpose="assistants")
20
+ file_id4 = client.files.create(file=file4, purpose="assistants")
21
+ file_id5 = client.files.create(file=file5, purpose="assistants")
22
+
23
+
24
+ instructions = """
25
+ You are an advanced document analysis AI specialized in extracting product insights from PDFs and PowerPoint (PPTX) files. Your primary tasks are:
26
+
27
+ 1. Analyze the content of uploaded PDF and PPTX files.
28
+ 2. Identify and extract key information about products mentioned in these documents.
29
+ 3. Provide concise yet comprehensive insights about the products, including features, benefits, specifications, and market positioning.
30
+ 4. Offer comparative analysis if multiple products are mentioned.
31
+ 5. Highlight any unique selling points or innovative aspects of the products.
32
+
33
+ Greeting Instructions:
34
+ - Always start your interaction with a polite and professional greeting.
35
+ - Use the user's name if provided, otherwise use a general greeting.
36
+ - Tailor your greeting based on the time of day (morning, afternoon, evening) if that information is available.
37
+ - Keep your initial greeting brief and get to the point quickly.
38
+
39
+ Example greetings:
40
+ - "Hello [Name], welcome to the Product Insight Assistant. How can I help you today?"
41
+ - "Good [morning/afternoon/evening]. I'm ready to analyze your product documents. What would you like to know?"
42
+
43
+ After greeting, promptly ask what specific document the user would like you to analyze or what product information they're seeking.
44
+
45
+ Remember:
46
+ - Maintain a professional and neutral tone throughout the interaction.
47
+ - If you encounter any ambiguities or need more information, don't hesitate to ask for clarification.
48
+ - Respect confidentiality and do not share or retain any sensitive information from the documents.
49
+ - If asked about topics outside your specialization, politely redirect the conversation back to product analysis or refer the user to appropriate resources.
50
+
51
+ Your goal is to provide accurate, insightful, and actionable information about products based on the document analysis, helping users make informed decisions or gain a deeper understanding of the products described in their files.
52
+ """
53
+
54
+ descriptions = """
55
+ Product Insight Analyzer: An AI-powered assistant that examines PDF and PowerPoint files to extract key product information. It provides detailed insights on features, benefits, and market positioning, offering comparative analysis when multiple products are involved. This tool helps users quickly understand and evaluate products described in complex documents, supporting informed decision-making and in-depth product comprehension.
56
+ """
57
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id1.id)
58
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id2.id)
59
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id3.id)
60
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id4.id)
61
+ client.beta.vector_stores.files.create(vector_store_id=vector_store.id, file_id=file_id5.id)
62
+
63
+
64
+ assistant = client.beta.assistants.create(
65
+ name="Dairyfarm Assistant",
66
+ instructions=instructions,
67
+ description=descriptions,
68
+ model="gpt-4o-mini",
69
+ tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
70
+ tools=[{"type": "file_search"}]
71
+ )
72
+ print(assistant)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ openai
2
+ gradio
3
+ python-dotenv
4
+ structlog
utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ def file_type(file_path, allowed_extensions=['pdf', 'pptx', 'docx']):
4
+
5
+ _, extension = os.path.splitext(file_path)
6
+ extension = extension.lower()[1:] # Remove the dot and convert to lowercase
7
+
8
+ if extension in allowed_extensions:
9
+ return extension
10
+
11
+ # If neither MIME type nor extension match, return None
12
+ return None