louiecerv commited on
Commit
09902f4
Β·
1 Parent(s): 7dbcd82
Files changed (1) hide show
  1. app.py +69 -61
app.py CHANGED
@@ -6,7 +6,6 @@ import tempfile
6
  import shutil
7
  import streamlit as st
8
  from PIL import Image
9
- from PyPDF2 import PdfReader
10
  import fitz # PyMuPDF
11
  from openai import OpenAI
12
 
@@ -14,113 +13,122 @@ from openai import OpenAI
14
  api_key = os.getenv("OPENAI_API_KEY")
15
  client = OpenAI(api_key=api_key)
16
 
17
- def extract_text_and_images_from_pdf(pdf_file_path):
 
 
 
18
  try:
19
- text_content = ""
20
- image_urls = []
21
-
22
- # Extract text using PdfReader
23
- pdf_reader = PdfReader(pdf_file_path)
24
- for page in pdf_reader.pages:
25
- text_content += page.extract_text() or ""
26
-
27
- # Extract images using PyMuPDF
28
- doc = fitz.open(pdf_file_path)
29
- for page_index in range(len(doc)):
30
- page = doc.load_page(page_index)
31
- image_list = page.get_images()
32
- for img_index, img in enumerate(image_list):
33
- xref = img[0]
34
- base_image = doc.extract_image(xref)
35
- image_bytes = base_image["image"]
36
- image = Image.open(BytesIO(image_bytes))
37
- # Resize image (optional)
38
- image.thumbnail((512, 512)) # Adjust size as needed
39
-
40
- # Encode the image as base64 and create a data URL
41
- buffered = io.BytesIO()
42
- image.save(buffered, format="JPEG")
43
- img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
44
- data_url = f"data:image/jpeg;base64,{img_str}"
45
- image_urls.append(data_url)
46
-
47
- return text_content, image_urls
 
 
 
 
 
 
 
48
  except Exception as e:
49
- st.error(f"An error occurred during PDF processing: {e}")
50
- return "", []
51
 
 
52
 
53
  def generate_ai_response(text_content, image_urls, text_prompt):
54
  try:
55
- # Construct the messages list
56
  if image_urls:
57
  messages = [
58
- {"role": "user", "content": f"{text_prompt} (Analyze the following text and images)"}
59
- ]
 
 
60
  else:
61
- messages = [
62
- {"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}
63
- ]
64
 
65
- # Create a streaming response
66
  response = client.chat.completions.create(
67
- model="gpt-4o-mini",
68
- messages=messages,
69
- max_tokens=2048,
70
- stream=True,
71
  )
72
  return response
73
 
74
  except Exception as e:
75
  st.error(f"An error occurred during AI response generation: {e}")
 
76
 
77
  def main():
78
  text_content = ""
79
  image_urls = []
80
 
81
- st.title("Multimodal PDF Processing using GPT-4 Turbo Model")
82
 
83
- uploaded_pdf = st.file_uploader("Upload a PDF", type=["pdf"])
84
- if uploaded_pdf is not None:
85
- # Save the uploaded PDF to a temporary directory
86
  temp_dir = tempfile.mkdtemp()
87
- pdf_file_path = os.path.join(temp_dir, uploaded_pdf.name)
88
- with open(pdf_file_path, "wb") as f:
89
- f.write(uploaded_pdf.getvalue())
90
 
91
- text_content, image_urls = extract_text_and_images_from_pdf(pdf_file_path)
92
 
93
- st.subheader("Extracted Text")
94
- st.text(text_content)
 
95
 
96
  if image_urls:
97
  st.subheader("Extracted Images")
98
  for img_url in image_urls:
99
  st.image(img_url, caption="Extracted Image", use_container_width=True)
100
 
101
- # Clean up the temporary directory
102
  shutil.rmtree(temp_dir)
103
 
104
  text_prompt = st.text_area("Enter a text prompt for the AI model:", "")
105
 
106
  if st.button("Generate Response"):
 
 
 
107
 
108
  response_placeholder = st.empty()
109
  response_text = ""
110
 
111
  with st.spinner("Processing..."):
112
  response = generate_ai_response(text_content, image_urls, text_prompt)
113
- print(response)
114
-
115
- # Process and stream the response chunks as they arrive
 
 
116
  for chunk in response:
117
  if chunk.choices[0].delta.content:
118
  delta_content = chunk.choices[0].delta.content
119
  response_text += delta_content
120
  response_placeholder.write(response_text)
121
 
122
- st.success("Response generated successfully!")
123
-
124
 
125
  if __name__ == "__main__":
126
- main()
 
6
  import shutil
7
  import streamlit as st
8
  from PIL import Image
 
9
  import fitz # PyMuPDF
10
  from openai import OpenAI
11
 
 
13
  api_key = os.getenv("OPENAI_API_KEY")
14
  client = OpenAI(api_key=api_key)
15
 
16
+ def extract_text_and_images(file_path):
17
+ text_content = ""
18
+ image_urls = []
19
+
20
  try:
21
+ extension = os.path.splitext(file_path)[1].lower()
22
+
23
+ if extension == ".pdf":
24
+ doc = fitz.open(file_path)
25
+ for page_index in range(len(doc)):
26
+ page = doc.load_page(page_index)
27
+ image_list = page.get_images()
28
+ for img_index, img in enumerate(image_list):
29
+ xref = img[0]
30
+ base_image = doc.extract_image(xref)
31
+ image_bytes = base_image["image"]
32
+ image = Image.open(BytesIO(image_bytes))
33
+ image.thumbnail((512, 512))
34
+
35
+ buffered = io.BytesIO()
36
+ image.save(buffered, format="jpeg") # Force JPEG for PDF images
37
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
38
+ data_url = f"data:image/jpeg;base64,{img_str}"
39
+ image_urls.append(data_url)
40
+
41
+ text_content += page.get_text("text") or ""
42
+
43
+ elif extension in (".jpg", ".jpeg", ".png"):
44
+ image = Image.open(file_path)
45
+ image.thumbnail((512, 512))
46
+
47
+ buffered = io.BytesIO()
48
+ image_format = "jpeg" if extension in (".jpg", ".jpeg") else "png"
49
+ image.save(buffered, format=image_format)
50
+
51
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
52
+ image_urls.append(f"data:image/{image_format};base64,{img_str}")
53
+
54
+ else:
55
+ st.error(f"Unsupported file type: {extension}")
56
+
57
  except Exception as e:
58
+ st.error(f"An error occurred during file processing: {e}")
 
59
 
60
+ return text_content, image_urls
61
 
62
  def generate_ai_response(text_content, image_urls, text_prompt):
63
  try:
 
64
  if image_urls:
65
  messages = [
66
+ {"role": "user", "content": ["type": "text": f"Perform the task {text_prompt} on the provided images)",
67
+ *[{"type": "image_url", "image_url": {"url": url}} for url in image_urls]]}
68
+ ]
69
+
70
  else:
71
+ messages = [{"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}]
 
 
72
 
 
73
  response = client.chat.completions.create(
74
+ model="gpt-4o-mini", messages=messages, max_tokens=2048, stream=True
 
 
 
75
  )
76
  return response
77
 
78
  except Exception as e:
79
  st.error(f"An error occurred during AI response generation: {e}")
80
+ return None
81
 
82
  def main():
83
  text_content = ""
84
  image_urls = []
85
 
86
+ st.title("Multimodal File Processing using GPT-4 Turbo Model")
87
 
88
+ uploaded_file = st.file_uploader("Upload a File (PDF, JPG, PNG, JPEG)", type=None)
89
+ if uploaded_file is not None:
 
90
  temp_dir = tempfile.mkdtemp()
91
+ file_path = os.path.join(temp_dir, uploaded_file.name)
92
+ with open(file_path, "wb") as f:
93
+ f.write(uploaded_file.getvalue())
94
 
95
+ text_content, image_urls = extract_text_and_images(file_path)
96
 
97
+ if text_content:
98
+ st.subheader("Extracted Text")
99
+ st.text(text_content)
100
 
101
  if image_urls:
102
  st.subheader("Extracted Images")
103
  for img_url in image_urls:
104
  st.image(img_url, caption="Extracted Image", use_container_width=True)
105
 
 
106
  shutil.rmtree(temp_dir)
107
 
108
  text_prompt = st.text_area("Enter a text prompt for the AI model:", "")
109
 
110
  if st.button("Generate Response"):
111
+ if not text_prompt:
112
+ st.warning("Please enter a text prompt.")
113
+ return
114
 
115
  response_placeholder = st.empty()
116
  response_text = ""
117
 
118
  with st.spinner("Processing..."):
119
  response = generate_ai_response(text_content, image_urls, text_prompt)
120
+
121
+ if response is None:
122
+ st.error("There was an issue contacting the OpenAI API. Please check your API key and try again.")
123
+ return
124
+
125
  for chunk in response:
126
  if chunk.choices[0].delta.content:
127
  delta_content = chunk.choices[0].delta.content
128
  response_text += delta_content
129
  response_placeholder.write(response_text)
130
 
131
+ st.success("Response generated successfully!")
 
132
 
133
  if __name__ == "__main__":
134
+ main()