Spaces:
Sleeping
Sleeping
File size: 7,557 Bytes
24c01fa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import os
import pickle
from flask import Flask, render_template, request, redirect, url_for, flash, send_file
from flask_bcrypt import Bcrypt
from PIL import Image
import numpy as np
import cv2
import onnxruntime
from transformers import BlipProcessor, BlipForConditionalGeneration, BlipForQuestionAnswering
from werkzeug.utils import secure_filename
import pandas as pd
from duckduckgo_search import DDGS
# Initialize Flask app and Bcrypt for password hashing
app = Flask(__name__)
app.secret_key = 'your_secret_key'
bcrypt = Bcrypt(app)
# Define paths to your models
caption_model_path = "models/trained_model(1).pkl"
vqa_model_path = "models/vqa_model.pkl"
# Set upload folder and allowed extensions
app.config['UPLOAD_FOLDER'] = 'static/uploads'
app.config['UPSCALED_FOLDER'] = 'static/upscaled'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
os.makedirs(app.config['UPSCALED_FOLDER'], exist_ok=True)
ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg'}
# Preload models and processors for efficiency
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
vqa_processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-capfilt-large")
vqa_model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-capfilt-large")
# Helper functions
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def convert_pil_to_cv2(image):
# pil_image = image.convert("RGB")
open_cv_image = np.array(image)
# RGB to BGR
open_cv_image = open_cv_image[:, :, ::-1].copy()
return open_cv_image
def pre_process(img: np.array) -> np.array:
# H, W, C -> C, H, W
img = np.transpose(img[:, :, 0:3], (2, 0, 1))
# C, H, W -> 1, C, H, W
img = np.expand_dims(img, axis=0).astype(np.float32)
return img
def post_process(img: np.array) -> np.array:
# 1, C, H, W -> C, H, W
img = np.squeeze(img)
# C, H, W -> H, W, C
img = np.transpose(img, (1, 2, 0))[:, :, ::-1].astype(np.uint8)
return img
def inference(model_path: str, img_array: np.array) -> np.array:
options = onnxruntime.SessionOptions()
options.intra_op_num_threads = 1
options.inter_op_num_threads = 1
ort_session = onnxruntime.InferenceSession(model_path, options)
ort_inputs = {ort_session.get_inputs()[0].name: img_array}
ort_outs = ort_session.run(None, ort_inputs)
return ort_outs[0]
def upscale(image_path: str, model="modelx2"):
model_path = f"models/{model}.ort"
pil_image = Image.open(image_path)
img = convert_pil_to_cv2(pil_image)
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
if img.shape[2] == 4:
alpha = img[:, :, 3] # GRAY
alpha = cv2.cvtColor(alpha, cv2.COLOR_GRAY2BGR) # BGR
alpha_output = post_process(inference(model_path, pre_process(alpha))) # BGR
alpha_output = cv2.cvtColor(alpha_output, cv2.COLOR_BGR2GRAY) # GRAY
img = img[:, :, 0:3] # BGR
image_output = post_process(inference(model_path, pre_process(img))) # BGR
image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2BGRA) # BGRA
image_output[:, :, 3] = alpha_output
elif img.shape[2] == 3:
image_output = post_process(inference(model_path, pre_process(img)))
image_output = cv2.cvtColor(image_output, cv2.COLOR_BGR2RGB)
return image_output
# Main route
@app.route('/')
def index():
return render_template('index.html', models=["modelx2", "modelx4"])
@app.route('/upload', methods=['POST'])
def upload_file():
if 'file' not in request.files:
flash('Please upload an image.')
return redirect(url_for('index'))
file = request.files['file']
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
file.save(filepath)
similar_images = []
try:
upscaled_img = upscale(filepath)
upscaled_filename = f"upscaled_{filename}"
upscaled_path = os.path.join(app.config['UPSCALED_FOLDER'], upscaled_filename)
cv2.imwrite(upscaled_path, upscaled_img)
image = Image.open(upscaled_path).convert("RGB")
caption = generate_caption(image)
results = DDGS().images(
keywords=caption,
region="wt-wt",
safesearch="off",
size=None,
color="Monochrome",
type_image=None,
layout=None,
license_image=None,
max_results=100,
)
for i in results:
similar_images.append(i['image'])
image_url = url_for('serve_upscaled_file', filename=upscaled_filename)
return render_template('index.html',input_image_url=filepath, image_url=upscaled_path ,similar_images=similar_images, show_buttons=True)
except Exception as e:
flash(f"Upscaling failed: {e}")
return redirect(url_for('index'))
else:
flash('Invalid file format. Please upload a PNG, JPG, or JPEG file.')
return redirect(url_for('index'))
@app.route('/process_image', methods=['POST'])
def process_image():
image_url = os.path.basename(request.form.get('image_url'))
filepath = os.path.join(app.config['UPSCALED_FOLDER'], image_url)
print(filepath)
image = Image.open(filepath).convert("RGB")
if os.path.exists(filepath):
if 'vqa' in request.form:
question = request.form.get('question')
if question:
answer = answer_question(image, question)
return render_template('index.html', image_url=filepath, answer=answer, show_buttons=True, question=question)
else:
flash("Please enter a question.")
elif 'caption' in request.form:
caption = generate_caption(image)
return render_template('index.html', image_url=filepath, caption=caption, show_buttons=True)
else:
flash("File not found. Please re-upload the image.")
return redirect(url_for('index'))
def generate_caption(image):
# Process the image and prepare it for input to the model
inputs = caption_processor(images=image, return_tensors="pt")
# Generate caption (model's output is token IDs)
out = caption_model.generate(**inputs)
# Decode the generated tokens back into text (the output is a tensor of token IDs)
caption = caption_processor.decode(out[0], skip_special_tokens=True)
return caption
def answer_question(image, question):
# Process the image and the question, prepare them for input to the model
inputs = vqa_processor(images=image, text=question, return_tensors="pt")
# Generate an answer (model's output is token IDs)
out = vqa_model.generate(**inputs)
# Decode the generated tokens back into the answer (again, output is token IDs)
answer = vqa_processor.decode(out[0], skip_special_tokens=True)
return answer
@app.route('/uploads/<filename>')
def serve_uploaded_file(filename):
return send_file(os.path.join(app.config['UPLOAD_FOLDER'], filename))
@app.route('/upscaled/<filename>')
def serve_upscaled_file(filename):
return send_file(os.path.join(app.config['UPSCALED_FOLDER'], filename))
# Run app
if __name__ == '__main__':
app.run(debug=True)
|