Spaces:
Sleeping
Sleeping
File size: 5,043 Bytes
a47b2ba 6c33596 632758a 798ee13 d03f6fb 632758a d03f6fb a47b2ba 632758a 798ee13 632758a 798ee13 632758a a47b2ba 632758a 798ee13 c384cc2 632758a ef2d262 c384cc2 632758a a47b2ba 632758a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
from datasets import load_dataset
import json
import random
from datetime import datetime
import os
from PIL import Image
import io
import numpy as np
# Get access token from environment
access_token = os.environ.get("HUGGINGFACE_TOKEN")
class DatasetViewer:
def __init__(self):
self.dataset = None
self.dataset_size = 0
self.last_refresh_time = None
self.max_display_size = (800, 600) # Maximum width and height for displayed images
self.load_dataset()
def resize_image(self, image):
"""Resize image keeping aspect ratio with a maximum size constraint"""
if isinstance(image, np.ndarray):
# Convert numpy array to PIL Image
image = Image.fromarray(image)
elif isinstance(image, bytes):
# Convert bytes to PIL Image
image = Image.open(io.BytesIO(image))
# Calculate scaling factor to fit within max dimensions
width_ratio = self.max_display_size[0] / image.width
height_ratio = self.max_display_size[1] / image.height
scale_factor = min(width_ratio, height_ratio)
# Only resize if image is larger than max dimensions
if scale_factor < 1:
new_width = int(image.width * scale_factor)
new_height = int(image.height * scale_factor)
image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
# Convert back to numpy array for gradio
return np.array(image)
def load_dataset(self):
"""Load the complete dataset into memory"""
# Load the full dataset (non-streaming)
self.dataset = load_dataset(
"taesiri/PhotoshopRequest-DailyDump-January-2025-RandomSample",
split="train",
token=access_token
)
self.dataset_size = len(self.dataset)
self.last_refresh_time = datetime.now()
def get_next_samples(self, num_samples=5):
"""Get random samples from the dataset"""
# Generate random indices
indices = random.sample(range(self.dataset_size), min(num_samples, self.dataset_size))
results = []
for idx in indices:
sample = self.dataset[idx]
# Get post information
post_id = sample["post_id"]
title = sample["title"]
reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
# Extract selftext if available
selftext = ""
try:
selftext = json.loads(sample["json_data"])["post"]["selftext"]
except:
print(f"No selftext found for post {post_id}")
# Create markdown text
markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
# Append the triple (post_info, source_image, edited_image)
results.append(markdown_text)
# Resize images before adding to results
source_image = self.resize_image(sample["source_image"])
edited_image = self.resize_image(sample["edited_image"])
results.append(source_image)
results.append(edited_image)
return tuple(results)
def get_info(self):
"""Return dataset information"""
return f"""
<div style="text-align: center;">
<hr>
Dataset Size: {self.dataset_size} items<br>
Last Refreshed: {self.last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC')}
</div>
"""
def create_interface():
viewer = DatasetViewer()
with gr.Blocks() as demo:
gr.Markdown("# PhotoshopRequest Dataset Viewer")
gr.Markdown("""
This is a viewer for the PhotoshopRequest dataset. Each sample shows a Photoshop editing request post.
Click the 'Show New Samples' button to see **5 random samples** from the dataset.
**Layout**: For each sample, you'll see:
1. The post title and description
2. The source image (left) and edited result (right)
""")
# Create 5 sets of outputs
outputs = []
for i in range(5):
post_info = gr.Markdown()
outputs.append(post_info)
with gr.Row():
source = gr.Image(label=f"Source Image {i+1}")
edited = gr.Image(label=f"Edited Image {i+1}")
outputs.extend([source, edited])
sample_button = gr.Button("Show New Samples")
info_md = gr.Markdown()
# Set up event handlers
sample_button.click(
viewer.get_next_samples,
outputs=outputs
).then(
viewer.get_info,
outputs=[info_md]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.launch() |