File size: 4,650 Bytes
59e2122
 
3114a1e
59e2122
49c45c3
bf667e2
3114a1e
59e2122
4a47ee5
3114a1e
59e2122
 
bf667e2
 
 
09902f4
 
 
 
3114a1e
09902f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3114a1e
09902f4
3114a1e
09902f4
73268c4
3114a1e
 
bf667e2
970c52b
 
 
 
 
 
09902f4
970c52b
 
09902f4
49c45c3
09902f4
3114a1e
 
598b653
3114a1e
bf667e2
3114a1e
 
 
09902f4
73268c4
59e2122
bf667e2
 
59e2122
09902f4
59e2122
53226ff
09902f4
bf667e2
09902f4
 
 
bf667e2
09902f4
59e2122
09902f4
 
 
59e2122
3114a1e
59e2122
3114a1e
bf667e2
 
 
 
 
 
 
09902f4
 
 
bf667e2
 
 
 
 
 
09902f4
 
 
 
 
bf667e2
 
 
 
 
 
09902f4
59e2122
 
09902f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import base64
import io
from io import BytesIO
import tempfile
import shutil
import streamlit as st
from PIL import Image
import fitz  # PyMuPDF
from openai import OpenAI

# OpenAI API Key
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

def extract_text_and_images(file_path):
    text_content = ""
    image_urls = []

    try:
        extension = os.path.splitext(file_path)[1].lower()

        if extension == ".pdf":
            doc = fitz.open(file_path)
            for page_index in range(len(doc)):
                page = doc.load_page(page_index)
                image_list = page.get_images()
                for img_index, img in enumerate(image_list):
                    xref = img[0]
                    base_image = doc.extract_image(xref)
                    image_bytes = base_image["image"]
                    image = Image.open(BytesIO(image_bytes))
                    image.thumbnail((512, 512))

                    buffered = io.BytesIO()
                    image.save(buffered, format="jpeg") # Force JPEG for PDF images
                    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
                    data_url = f"data:image/jpeg;base64,{img_str}"
                    image_urls.append(data_url)

                text_content += page.get_text("text") or ""

        elif extension in (".jpg", ".jpeg", ".png"):
            image = Image.open(file_path)
            image.thumbnail((512, 512))

            buffered = io.BytesIO()
            image_format = "jpeg" if extension in (".jpg", ".jpeg") else "png"
            image.save(buffered, format=image_format)

            img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
            image_urls.append(f"data:image/{image_format};base64,{img_str}")

        else:
            st.error(f"Unsupported file type: {extension}")

    except Exception as e:
        st.error(f"An error occurred during file processing: {e}")

    return text_content, image_urls

def generate_ai_response(text_content, image_urls, text_prompt):
    try:
        if image_urls:
           messages = [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": text_prompt},
                        *[{"type": "image_url", "image_url": {"url": url}} for url in image_urls]
                    ]
                }
            ]
        
        else:
            messages = [{"role": "user", "content": f"{text_prompt} Analyze the text: {text_content}"}]

        response = client.chat.completions.create(
            model="gpt-4o", messages=messages, max_tokens=2048, stream=True
        )
        return response

    except Exception as e:
        st.error(f"An error occurred during AI response generation: {e}")
        return None

def main():
    text_content = ""
    image_urls = []

    st.title("Multimodal File Processing using GPT-4 Turbo Model")

    uploaded_file = st.file_uploader("Upload a File (PDF, JPG, PNG, JPEG)", type=["pdf", "jpg", "jpeg", "png"])
    if uploaded_file is not None:
        temp_dir = tempfile.mkdtemp()
        file_path = os.path.join(temp_dir, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getvalue())

        text_content, image_urls = extract_text_and_images(file_path)

        if text_content:
            st.subheader("Extracted Text")
            st.text(text_content)

        if image_urls:
            st.subheader("Extracted Images")
            for img_url in image_urls:
                st.image(img_url, caption="Extracted Image", use_container_width=True)

        shutil.rmtree(temp_dir)

    text_prompt = st.text_area("Enter a text prompt for the AI model:", "")

    if st.button("Generate Response"):
        if not text_prompt:
            st.warning("Please enter a text prompt.")
            return

        response_placeholder = st.empty()
        response_text = ""

        with st.spinner("Processing..."):
            response = generate_ai_response(text_content, image_urls, text_prompt)

            if response is None:
                st.error("There was an issue contacting the OpenAI API. Please check your API key and try again.")
                return

            for chunk in response:
                if chunk.choices[0].delta.content:
                    delta_content = chunk.choices[0].delta.content
                    response_text += delta_content
                    response_placeholder.write(response_text)

        st.success("Response generated successfully!")

if __name__ == "__main__":
    main()