Spaces:

mupparajuk31
/

imagechatbot

Sleeping

File size: 7,557 Bytes

24c01fa

import os
import pickle
from flask import Flask, render_template, request, redirect, url_for, flash, send_file
from flask_bcrypt import Bcrypt
from PIL import Image
import numpy as np
import cv2
import onnxruntime
from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering
from werkzeug.utils import secure_filename
import pandas as pd
from duckduckgo_search import DDGS

# Initialize Flask app and Bcrypt for password hashing
app = Flask(__name__)
app.secret_key = 'your_secret_key'
bcrypt = Bcrypt(app)

# Define paths to your models
caption_model_path = "models/trained_model(1).pkl"
vqa_model_path = "models/vqa_model.pkl"

# Set upload folder and allowed extensions
app.config['UPLOAD_FOLDER'] = 'static/uploads'
app.config['UPSCALED_FOLDER'] = 'static/upscaled'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
os.makedirs(app.config['UPSCALED_FOLDER'], exist_ok=True)
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}

# Preload models and processors for efficiency
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")

# Helper functions
def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def convert_pil_to_cv2(image):
    # pil_image = image.convert("RGB")
    open_cv_image = np.array(image)
    # RGB to BGR
    open_cv_image = open_cv_image[:, :, ::-1].copy()
    return open_cv_image


def pre_process(img: np.array) -> np.array:
    # H, W, C -> C, H, W
    img = np.transpose(img[:, :, 0:3], (2, 0, 1))
    # C, H, W -> 1, C, H, W
    img = np.expand_dims(img, axis=0).astype(np.float32)
    return img

def post_process(img: np.array) -> np.array:
    # 1, C, H, W -> C, H, W
    img = np.squeeze(img)
    # C, H, W -> H, W, C
    img = np.transpose(img, (1, 2, 0))[:, :, ::-1].astype(np.uint8)
    return img

def inference(model_path: str, img_array: np.array) -> np.array:
    options = onnxruntime.SessionOptions()
    options.intra_op_num_threads = 1
    options.inter_op_num_threads = 1
    ort_session = onnxruntime.InferenceSession(model_path, options)
    ort_inputs = {ort_session.get_inputs()[0].name: img_array}
    ort_outs = ort_session.run(None, ort_inputs)

    return ort_outs[0]

def upscale(image_path: str, model="modelx2"):
    model_path = f"models/{model}.ort"
    pil_image = Image.open(image_path)
    img = convert_pil_to_cv2(pil_image)
    if img.ndim == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

    if img.shape[2] == 4:
        alpha = img[:, :, 3]  # GRAY
        alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2BGR)  # BGR
        alpha_output = post_process(inference(model_path, pre_process(alpha)))  # BGR
        alpha_output = cv2.cvtColor(alpha_output, cv2.COLOR_BGR2GRAY)  # GRAY

        img = img[:, :, 0:3]  # BGR
        image_output = post_process(inference(model_path, pre_process(img)))  # BGR
        image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2BGRA)  # BGRA
        image_output[:, :, 3] = alpha_output
    elif img.shape[2] == 3:
        image_output = post_process(inference(model_path, pre_process(img)))
    image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2RGB) 
    return image_output

# Main route
@app.route('/')
def index():
    return render_template('index.html', models=["modelx2", "modelx4"])

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        flash('Please upload an image.')
        return redirect(url_for('index'))

    file = request.files['file']
    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)
        similar_images = []
        try:
            upscaled_img = upscale(filepath)
            upscaled_filename = f"upscaled_{filename}"
            upscaled_path = os.path.join(app.config['UPSCALED_FOLDER'], upscaled_filename)
            cv2.imwrite(upscaled_path, upscaled_img)

            image = Image.open(upscaled_path).convert("RGB")
            caption = generate_caption(image)

            results = DDGS().images(
            keywords=caption,
            region="wt-wt",
            safesearch="off",
            size=None,
            color="Monochrome",
            type_image=None,
            layout=None,
            license_image=None,
            max_results=100,
            )
            for i in results:
                similar_images.append(i['image'])

            
            image_url = url_for('serve_upscaled_file', filename=upscaled_filename)
            return render_template('index.html',input_image_url=filepath, image_url=upscaled_path ,similar_images=similar_images, show_buttons=True)
        except Exception as e:
            flash(f"Upscaling failed: {e}")
            return redirect(url_for('index'))
    else:
        flash('Invalid file format. Please upload a PNG, JPG, or JPEG file.')
        return redirect(url_for('index'))

@app.route('/process_image', methods=['POST'])
def process_image():
    image_url = os.path.basename(request.form.get('image_url'))
    filepath = os.path.join(app.config['UPSCALED_FOLDER'], image_url)
    
    print(filepath)
    image = Image.open(filepath).convert("RGB")
    if os.path.exists(filepath):
        
        if 'vqa' in request.form:
            question = request.form.get('question')
            if question:
                answer = answer_question(image, question)
                return render_template('index.html', image_url=filepath, answer=answer, show_buttons=True, question=question)
            else:
                flash("Please enter a question.")
        elif 'caption' in request.form:
            caption = generate_caption(image)
            return render_template('index.html', image_url=filepath, caption=caption, show_buttons=True)
    else:
        flash("File not found. Please re-upload the image.")
    return redirect(url_for('index'))

def generate_caption(image):
    # Process the image and prepare it for input to the model
    inputs = caption_processor(images=image, return_tensors="pt")
    
    # Generate caption (model's output is token IDs)
    out = caption_model.generate(**inputs)
    
    # Decode the generated tokens back into text (the output is a tensor of token IDs)
    caption = caption_processor.decode(out[0], skip_special_tokens=True)
    
    return caption

def answer_question(image, question):
    # Process the image and the question, prepare them for input to the model
    inputs = vqa_processor(images=image, text=question, return_tensors="pt")
    
    # Generate an answer (model's output is token IDs)
    out = vqa_model.generate(**inputs)
    
    # Decode the generated tokens back into the answer (again, output is token IDs)
    answer = vqa_processor.decode(out[0], skip_special_tokens=True)
    
    return answer

@app.route('/uploads/<filename>')
def serve_uploaded_file(filename):
    return send_file(os.path.join(app.config['UPLOAD_FOLDER'], filename))

@app.route('/upscaled/<filename>')
def serve_upscaled_file(filename):
    return send_file(os.path.join(app.config['UPSCALED_FOLDER'], filename))

# Run app
if __name__ == '__main__':
    app.run(debug=True)