Spaces:
Sleeping
Sleeping
File size: 4,650 Bytes
59e2122 3114a1e 59e2122 49c45c3 bf667e2 3114a1e 59e2122 4a47ee5 3114a1e 59e2122 bf667e2 09902f4 3114a1e 09902f4 3114a1e 09902f4 3114a1e 09902f4 73268c4 3114a1e bf667e2 970c52b 09902f4 970c52b 09902f4 49c45c3 09902f4 3114a1e 598b653 3114a1e bf667e2 3114a1e 09902f4 73268c4 59e2122 bf667e2 59e2122 09902f4 59e2122 53226ff 09902f4 bf667e2 09902f4 bf667e2 09902f4 59e2122 09902f4 59e2122 3114a1e 59e2122 3114a1e bf667e2 09902f4 bf667e2 09902f4 bf667e2 09902f4 59e2122 09902f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import os
import base64
import io
from io import BytesIO
import tempfile
import shutil
import streamlit as st
from PIL import Image
import fitz # PyMuPDF
from openai import OpenAI
# OpenAI API Key
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)
def extract_text_and_images(file_path):
text_content = ""
image_urls = []
try:
extension = os.path.splitext(file_path)[1].lower()
if extension == ".pdf":
doc = fitz.open(file_path)
for page_index in range(len(doc)):
page = doc.load_page(page_index)
image_list = page.get_images()
for img_index, img in enumerate(image_list):
xref = img[0]
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image = Image.open(BytesIO(image_bytes))
image.thumbnail((512, 512))
buffered = io.BytesIO()
image.save(buffered, format="jpeg") # Force JPEG for PDF images
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
data_url = f"data:image/jpeg;base64,{img_str}"
image_urls.append(data_url)
text_content += page.get_text("text") or ""
elif extension in (".jpg", ".jpeg", ".png"):
image = Image.open(file_path)
image.thumbnail((512, 512))
buffered = io.BytesIO()
image_format = "jpeg" if extension in (".jpg", ".jpeg") else "png"
image.save(buffered, format=image_format)
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
image_urls.append(f"data:image/{image_format};base64,{img_str}")
else:
st.error(f"Unsupported file type: {extension}")
except Exception as e:
st.error(f"An error occurred during file processing: {e}")
return text_content, image_urls
def generate_ai_response(text_content, image_urls, text_prompt):
try:
if image_urls:
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": text_prompt},
*[{"type": "image_url", "image_url": {"url": url}} for url in image_urls]
]
}
]
else:
messages = [{"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}]
response = client.chat.completions.create(
model="gpt-4o", messages=messages, max_tokens=2048, stream=True
)
return response
except Exception as e:
st.error(f"An error occurred during AI response generation: {e}")
return None
def main():
text_content = ""
image_urls = []
st.title("Multimodal File Processing using GPT-4 Turbo Model")
uploaded_file = st.file_uploader("Upload a File (PDF, JPG, PNG, JPEG)", type=["pdf", "jpg", "jpeg", "png"])
if uploaded_file is not None:
temp_dir = tempfile.mkdtemp()
file_path = os.path.join(temp_dir, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getvalue())
text_content, image_urls = extract_text_and_images(file_path)
if text_content:
st.subheader("Extracted Text")
st.text(text_content)
if image_urls:
st.subheader("Extracted Images")
for img_url in image_urls:
st.image(img_url, caption="Extracted Image", use_container_width=True)
shutil.rmtree(temp_dir)
text_prompt = st.text_area("Enter a text prompt for the AI model:", "")
if st.button("Generate Response"):
if not text_prompt:
st.warning("Please enter a text prompt.")
return
response_placeholder = st.empty()
response_text = ""
with st.spinner("Processing..."):
response = generate_ai_response(text_content, image_urls, text_prompt)
if response is None:
st.error("There was an issue contacting the OpenAI API. Please check your API key and try again.")
return
for chunk in response:
if chunk.choices[0].delta.content:
delta_content = chunk.choices[0].delta.content
response_text += delta_content
response_placeholder.write(response_text)
st.success("Response generated successfully!")
if __name__ == "__main__":
main() |