Spaces:

sunbv56
/

generate_edit_pic_gemini

Running

App Files Files Community

sunbv56 commited on Mar 25

Commit

e79e752

verified ·

1 Parent(s): 73aaa23

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -10

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import gradio as gr
 import asyncio
 import os
 from google import genai
 from google.genai import types
 from PIL import Image
 from io import BytesIO
-from super_image import RcanModel, ImageLoader
 # Cấu hình API Key
 api_key = os.getenv("GEMINI_API_KEY")
@@ -14,20 +17,43 @@ if not api_key:
 client = genai.Client(api_key=api_key)
-# Load RCAN-BAM model
-model = RcanModel.from_pretrained('eugenesiow/rcan-bam', scale=2)
 def upscale_image(image):
-    """Nâng cấp độ phân giải ảnh bằng RCAN-BAM"""
-    inputs = ImageLoader.load_image(image)
-    preds = model(inputs)
-    return ImageLoader.to_pil_image(preds)
 def load_image_as_bytes(image_path):
     """Chuyển ảnh thành dữ liệu nhị phân"""
     with Image.open(image_path) as img:
         img = img.convert("RGB")  # Đảm bảo ảnh là RGB
-        img = upscale_image(img)  # RCAN-BAM xử lý
         img_bytes = BytesIO()
         img.save(img_bytes, format="JPEG")  # Lưu ảnh vào buffer
         return img_bytes.getvalue()  # Lấy dữ liệu nhị phân
@@ -49,7 +75,7 @@ async def generate_image(image_bytes, text_input):
     for part in response.candidates[0].content.parts:
         if part.inline_data is not None:
             img = Image.open(BytesIO(part.inline_data.data))
-            img = upscale_image(img)  # RCAN-BAM sau khi nhận ảnh từ Gemini
             images.append(img)
     return images
@@ -76,7 +102,7 @@ demo = gr.Interface(
         gr.Slider(minimum=1, maximum=4, step=1, value=4, label="Số lượng ảnh cần tạo")
     ],
     outputs=gr.Gallery(label="Kết quả chỉnh sửa", columns=4),
-    title="Chỉnh sửa ảnh bằng Gemini AI + RCAN-BAM",
     description="Upload ảnh và nhập yêu cầu chỉnh sửa. Ảnh được nâng cấp độ phân giải trước và sau khi xử lý.",
 )

 import gradio as gr
 import asyncio
 import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torchvision.transforms as transforms
 from google import genai
 from google.genai import types
 from PIL import Image
 from io import BytesIO
 # Cấu hình API Key
 api_key = os.getenv("GEMINI_API_KEY")
 client = genai.Client(api_key=api_key)
+# Định nghĩa mô hình SRCNN
+class SRCNN(nn.Module):
+    def __init__(self):
+        super(SRCNN, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=9, padding=4)
+        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=2)
+        self.conv3 = nn.Conv2d(32, 3, kernel_size=5, padding=2)
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        x = self.relu(self.conv1(x))
+        x = self.relu(self.conv2(x))
+        x = self.conv3(x)
+        return x
+# Khởi tạo mô hình SRCNN
+model = SRCNN()
+model.load_state_dict(torch.load("srcnn.pth", map_location=torch.device('cpu')))
+model.eval()
 def upscale_image(image):
+    """Nâng cấp độ phân giải ảnh bằng SRCNN"""
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Lambda(lambda x: x.unsqueeze(0))  # Thêm batch dimension
+    ])
+    input_tensor = transform(image)
+    with torch.no_grad():
+        output_tensor = model(input_tensor)
+    output_image = transforms.ToPILImage()(output_tensor.squeeze(0))
+    return output_image
 def load_image_as_bytes(image_path):
     """Chuyển ảnh thành dữ liệu nhị phân"""
     with Image.open(image_path) as img:
         img = img.convert("RGB")  # Đảm bảo ảnh là RGB
+        img = upscale_image(img)  # SRCNN xử lý
         img_bytes = BytesIO()
         img.save(img_bytes, format="JPEG")  # Lưu ảnh vào buffer
         return img_bytes.getvalue()  # Lấy dữ liệu nhị phân
     for part in response.candidates[0].content.parts:
         if part.inline_data is not None:
             img = Image.open(BytesIO(part.inline_data.data))
+            img = upscale_image(img)  # SRCNN sau khi nhận ảnh từ Gemini
             images.append(img)
     return images
         gr.Slider(minimum=1, maximum=4, step=1, value=4, label="Số lượng ảnh cần tạo")
     ],
     outputs=gr.Gallery(label="Kết quả chỉnh sửa", columns=4),
+    title="Chỉnh sửa ảnh bằng Gemini AI + SRCNN",
     description="Upload ảnh và nhập yêu cầu chỉnh sửa. Ảnh được nâng cấp độ phân giải trước và sau khi xử lý.",
 )