louiecerv commited on
Commit
bf667e2
Β·
1 Parent(s): 0e012ba

Streaming response added

Browse files
Files changed (2) hide show
  1. app.py +58 -69
  2. image_0.jpg +0 -0
app.py CHANGED
@@ -3,8 +3,7 @@ import base64
3
  import io
4
  from io import BytesIO
5
  import tempfile
6
- import time
7
-
8
  import streamlit as st
9
  from PIL import Image
10
  from PyPDF2 import PdfReader
@@ -12,27 +11,22 @@ import fitz # PyMuPDF
12
  from openai import OpenAI
13
 
14
  # OpenAI API Key
15
- try:
16
- api_key = os.getenv("OPENAI_API_KEY")
17
- client = OpenAI(api_key=api_key)
18
- except Exception as e:
19
- st.error(f"An error occurred during OpenAI client initialization: {e}")
20
- st.stop()
21
 
22
  def extract_text_and_images_from_pdf(pdf_file_path):
23
  try:
24
  text_content = ""
25
  image_urls = []
26
 
27
- pdf_stream = BytesIO(pdf_file.read())
28
-
29
  # Extract text using PdfReader
30
- pdf_reader = PdfReader(pdf_stream)
31
  for page in pdf_reader.pages:
32
- text_content += page.extract_text()
33
 
34
  # Extract images using PyMuPDF
35
- doc = fitz.open(stream=pdf_stream)
36
  for page_index in range(len(doc)):
37
  page = doc.load_page(page_index)
38
  image_list = page.get_images()
@@ -44,14 +38,12 @@ def extract_text_and_images_from_pdf(pdf_file_path):
44
  # Resize image (optional)
45
  image.thumbnail((512, 512)) # Adjust size as needed
46
 
47
- # Encode the image as base64 and create a data URL
48
- with open(image_path, "rb") as f:
49
- img_str = base64.b64encode(f.read()).decode("utf-8")
50
- data_url = f"data:image/jpeg;base64,{img_str}"
51
- image_urls.append(data_url)
52
-
53
- # Close the fitz document
54
- doc.close()
55
 
56
  return text_content, image_urls
57
  except Exception as e:
@@ -61,78 +53,75 @@ def extract_text_and_images_from_pdf(pdf_file_path):
61
 
62
  def generate_ai_response(text_content, image_urls, text_prompt):
63
  try:
64
- # Construct the messages list with the prompt and base64-encoded image URLs
65
- messages = [
66
- {
67
- "role": "user",
68
- "content": [
69
- {"type": "text", "text": text_prompt},
70
- *[{"type": "image_url", "image_url": {"url": url}} for url in image_urls]
71
- ]
72
- }
73
- ]
74
-
75
  else:
76
- # Construct the prompt on the extracted text only
77
  messages = [
78
- {
79
- "role": "user",
80
- "content": [
81
- {"type": "text", "text": f"Perform this task {text_prompt} on this text {text_content}"}
82
- ]
83
- }
84
  ]
85
 
 
86
  response = client.chat.completions.create(
87
  model="gpt-4o-mini",
88
  messages=messages,
89
  max_tokens=2048,
 
90
  )
 
91
 
92
- content_string = response.choices[0].message.content
93
- return content_string
94
  except Exception as e:
95
  st.error(f"An error occurred during AI response generation: {e}")
96
- return ""
97
-
98
 
99
  def main():
100
- st.title("Multimodal PDF Processing using GPT-4 Turbo Model")
101
-
102
- text = """Prof. Louie F. Cervantes, M. Eng. (Information Engineering)
103
- CCS 229 - Intelligent Systems
104
- Department of Computer Science
105
- College of Information and Communications Technology
106
- West Visayas State University
107
- """
108
- with st.expander("About"):
109
- st.text(text)
110
 
111
- st.write("Upload a PDF file for analysis.")
112
 
113
  uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
114
  if uploaded_pdf is not None:
115
- text_content, image_urls = extract_text_and_images_from_pdf(uploaded_pdf)
 
 
 
 
 
 
116
 
117
  st.subheader("Extracted Text")
118
  st.text(text_content)
119
 
120
- text_prompt = st.text_area("Enter a text prompt for the AI model:", "")
121
-
122
  if image_urls:
123
  st.subheader("Extracted Images")
124
  for img_url in image_urls:
125
- st.image(
126
- img_url, caption="Extracted Image", use_container_width=True
127
- )
128
-
129
- if st.button("Generate Response"):
130
- with st.spinner("Processing..."):
131
- ai_response = generate_ai_response(
132
- text_content, image_urls, text_prompt
133
- )
134
- st.success("Response generated!")
135
- st.markdown(f"AI Response: {ai_response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  if __name__ == "__main__":
138
- main()
 
3
  import io
4
  from io import BytesIO
5
  import tempfile
6
+ import shutil
 
7
  import streamlit as st
8
  from PIL import Image
9
  from PyPDF2 import PdfReader
 
11
  from openai import OpenAI
12
 
13
  # OpenAI API Key
14
+ api_key = os.getenv("OPENAI_API_KEY")
15
+ client = OpenAI(api_key=api_key)
16
+
 
 
 
17
 
18
  def extract_text_and_images_from_pdf(pdf_file_path):
19
  try:
20
  text_content = ""
21
  image_urls = []
22
 
 
 
23
  # Extract text using PdfReader
24
+ pdf_reader = PdfReader(pdf_file_path)
25
  for page in pdf_reader.pages:
26
+ text_content += page.extract_text() or ""
27
 
28
  # Extract images using PyMuPDF
29
+ doc = fitz.open(pdf_file_path)
30
  for page_index in range(len(doc)):
31
  page = doc.load_page(page_index)
32
  image_list = page.get_images()
 
38
  # Resize image (optional)
39
  image.thumbnail((512, 512)) # Adjust size as needed
40
 
41
+ # Encode the image as base64 and create a data URL
42
+ buffered = io.BytesIO()
43
+ image.save(buffered, format="JPEG")
44
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
45
+ data_url = f"data:image/jpeg;base64,{img_str}"
46
+ image_urls.append(data_url)
 
 
47
 
48
  return text_content, image_urls
49
  except Exception as e:
 
53
 
54
  def generate_ai_response(text_content, image_urls, text_prompt):
55
  try:
56
+ # Construct the messages list
57
+ if image_urls:
58
+ messages = [
59
+ {"role": "user", "content": f"{text_prompt} (Analyze the following text and images)"}
60
+ ]
 
 
 
 
 
 
61
  else:
 
62
  messages = [
63
+ {"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}
 
 
 
 
 
64
  ]
65
 
66
+ # Create a streaming response
67
  response = client.chat.completions.create(
68
  model="gpt-4o-mini",
69
  messages=messages,
70
  max_tokens=2048,
71
+ stream=True,
72
  )
73
+ return response
74
 
 
 
75
  except Exception as e:
76
  st.error(f"An error occurred during AI response generation: {e}")
 
 
77
 
78
  def main():
79
+ text_content = ""
80
+ image_urls = []
 
 
 
 
 
 
 
 
81
 
82
+ st.title("Multimodal PDF Processing using GPT-4 Turbo Model")
83
 
84
  uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
85
  if uploaded_pdf is not None:
86
+ # Save the uploaded PDF to a temporary directory
87
+ temp_dir = tempfile.mkdtemp()
88
+ pdf_file_path = os.path.join(temp_dir, uploaded_pdf.name)
89
+ with open(pdf_file_path, "wb") as f:
90
+ f.write(uploaded_pdf.getvalue())
91
+
92
+ text_content, image_urls = extract_text_and_images_from_pdf(pdf_file_path)
93
 
94
  st.subheader("Extracted Text")
95
  st.text(text_content)
96
 
 
 
97
  if image_urls:
98
  st.subheader("Extracted Images")
99
  for img_url in image_urls:
100
+ st.image(img_url, caption="Extracted Image", use_container_width=True)
101
+
102
+ # Clean up the temporary directory
103
+ shutil.rmtree(temp_dir)
104
+
105
+ text_prompt = st.text_area("Enter a text prompt for the AI model:", "")
106
+
107
+ if st.button("Generate Response"):
108
+
109
+ response_placeholder = st.empty()
110
+ response_text = ""
111
+
112
+ with st.spinner("Processing..."):
113
+ response = generate_ai_response(text_content, image_urls, text_prompt)
114
+ print(response)
115
+
116
+ # Process and stream the response chunks as they arrive
117
+ for chunk in response:
118
+ if chunk.choices[0].delta.content:
119
+ delta_content = chunk.choices[0].delta.content
120
+ response_text += delta_content
121
+ response_placeholder.write(response_text)
122
+
123
+ st.success("Response generated successfully!")
124
+
125
 
126
  if __name__ == "__main__":
127
+ main()
image_0.jpg ADDED