import os import base64 import io from io import BytesIO import tempfile import shutil import streamlit as st from PIL import Image import fitz # PyMuPDF from openai import OpenAI # OpenAI API Key api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) def extract_text_and_images(file_path): text_content = "" image_urls = [] try: extension = os.path.splitext(file_path)[1].lower() if extension == ".pdf": doc = fitz.open(file_path) for page_index in range(len(doc)): page = doc.load_page(page_index) image_list = page.get_images() for img_index, img in enumerate(image_list): xref = img[0] base_image = doc.extract_image(xref) image_bytes = base_image["image"] image = Image.open(BytesIO(image_bytes)) image.thumbnail((512, 512)) buffered = io.BytesIO() image.save(buffered, format="jpeg") # Force JPEG for PDF images img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") data_url = f"data:image/jpeg;base64,{img_str}" image_urls.append(data_url) text_content += page.get_text("text") or "" elif extension in (".jpg", ".jpeg", ".png"): image = Image.open(file_path) image.thumbnail((512, 512)) buffered = io.BytesIO() image_format = "jpeg" if extension in (".jpg", ".jpeg") else "png" image.save(buffered, format=image_format) img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") image_urls.append(f"data:image/{image_format};base64,{img_str}") else: st.error(f"Unsupported file type: {extension}") except Exception as e: st.error(f"An error occurred during file processing: {e}") return text_content, image_urls def generate_ai_response(text_content, image_urls, text_prompt): try: if image_urls: messages = [ { "role": "user", "content": [ {"type": "text", "text": text_prompt}, *[{"type": "image_url", "image_url": {"url": url}} for url in image_urls] ] } ] else: messages = [{"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}] response = client.chat.completions.create( model="gpt-4o", messages=messages, max_tokens=2048, stream=True ) return response except Exception as e: st.error(f"An error occurred during AI response generation: {e}") return None def main(): text_content = "" image_urls = [] st.title("Multimodal File Processing using GPT-4 Turbo Model") uploaded_file = st.file_uploader("Upload a File (PDF, JPG, PNG, JPEG)", type=["pdf", "jpg", "jpeg", "png"]) if uploaded_file is not None: temp_dir = tempfile.mkdtemp() file_path = os.path.join(temp_dir, uploaded_file.name) with open(file_path, "wb") as f: f.write(uploaded_file.getvalue()) text_content, image_urls = extract_text_and_images(file_path) if text_content: st.subheader("Extracted Text") st.text(text_content) if image_urls: st.subheader("Extracted Images") for img_url in image_urls: st.image(img_url, caption="Extracted Image", use_container_width=True) shutil.rmtree(temp_dir) text_prompt = st.text_area("Enter a text prompt for the AI model:", "") if st.button("Generate Response"): if not text_prompt: st.warning("Please enter a text prompt.") return response_placeholder = st.empty() response_text = "" with st.spinner("Processing..."): response = generate_ai_response(text_content, image_urls, text_prompt) if response is None: st.error("There was an issue contacting the OpenAI API. Please check your API key and try again.") return for chunk in response: if chunk.choices[0].delta.content: delta_content = chunk.choices[0].delta.content response_text += delta_content response_placeholder.write(response_text) st.success("Response generated successfully!") if __name__ == "__main__": main()