Spaces:
Sleeping
Sleeping
import gradio as gr | |
from datasets import load_dataset | |
import json | |
import random | |
from datetime import datetime | |
import os | |
from PIL import Image | |
import io | |
import numpy as np | |
# Get access token from environment | |
access_token = os.environ.get("HUGGINGFACE_TOKEN") | |
class DatasetViewer: | |
def __init__(self): | |
self.dataset = None | |
self.dataset_size = 0 | |
self.last_refresh_time = None | |
self.max_display_size = (800, 600) # Maximum width and height for displayed images | |
self.load_dataset() | |
def resize_image(self, image): | |
"""Resize image keeping aspect ratio with a maximum size constraint""" | |
if isinstance(image, np.ndarray): | |
# Convert numpy array to PIL Image | |
image = Image.fromarray(image) | |
elif isinstance(image, bytes): | |
# Convert bytes to PIL Image | |
image = Image.open(io.BytesIO(image)) | |
# Calculate scaling factor to fit within max dimensions | |
width_ratio = self.max_display_size[0] / image.width | |
height_ratio = self.max_display_size[1] / image.height | |
scale_factor = min(width_ratio, height_ratio) | |
# Only resize if image is larger than max dimensions | |
if scale_factor < 1: | |
new_width = int(image.width * scale_factor) | |
new_height = int(image.height * scale_factor) | |
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS) | |
# Convert back to numpy array for gradio | |
return np.array(image) | |
def load_dataset(self): | |
"""Load the complete dataset into memory""" | |
# Load the full dataset (non-streaming) | |
self.dataset = load_dataset( | |
"taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample", | |
split="train", | |
token=access_token | |
) | |
self.dataset_size = len(self.dataset) | |
self.last_refresh_time = datetime.now() | |
def get_next_samples(self, num_samples=5): | |
"""Get random samples from the dataset""" | |
# Generate random indices | |
indices = random.sample(range(self.dataset_size), min(num_samples, self.dataset_size)) | |
results = [] | |
for idx in indices: | |
sample = self.dataset[idx] | |
# Get post information | |
post_id = sample["post_id"] | |
title = sample["title"] | |
reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}" | |
# Extract selftext if available | |
selftext = "" | |
try: | |
selftext = json.loads(sample["json_data"])["post"]["selftext"] | |
except: | |
print(f"No selftext found for post {post_id}") | |
# Create markdown text | |
markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})" | |
# Append the triple (post_info, source_image, edited_image) | |
results.append(markdown_text) | |
# Resize images before adding to results | |
source_image = self.resize_image(sample["source_image"]) | |
edited_image = self.resize_image(sample["edited_image"]) | |
results.append(source_image) | |
results.append(edited_image) | |
return tuple(results) | |
def get_info(self): | |
"""Return dataset information""" | |
return f""" | |
<div style="text-align: center;"> | |
<hr> | |
Dataset Size: {self.dataset_size} items<br> | |
Last Refreshed: {self.last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC')} | |
</div> | |
""" | |
def create_interface(): | |
viewer = DatasetViewer() | |
with gr.Blocks() as demo: | |
gr.Markdown("# PhotoshopRequest Dataset Viewer") | |
gr.Markdown(""" | |
This is a viewer for the PhotoshopRequest dataset. Each sample shows a Photoshop editing request post. | |
Click the 'Show New Samples' button to see **5 random samples** from the dataset. | |
**Layout**: For each sample, you'll see: | |
1. The post title and description | |
2. The source image (left) and edited result (right) | |
""") | |
# Create 5 sets of outputs | |
outputs = [] | |
for i in range(5): | |
post_info = gr.Markdown() | |
outputs.append(post_info) | |
with gr.Row(): | |
source = gr.Image(label=f"Source Image {i+1}") | |
edited = gr.Image(label=f"Edited Image {i+1}") | |
outputs.extend([source, edited]) | |
sample_button = gr.Button("Show New Samples") | |
info_md = gr.Markdown() | |
# Set up event handlers | |
sample_button.click( | |
viewer.get_next_samples, | |
outputs=outputs | |
).then( | |
viewer.get_info, | |
outputs=[info_md] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |