Spaces:

taesiri
/

PhotoshopRequests-Preview

Running

App Files Files Community

taesiri commited on Jan 31

Commit

ef2d262

verified ·

1 Parent(s): b99caf6

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -51

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ dataset_size = "Unknown"
 last_refresh_time = None
 REFRESH_INTERVAL = timedelta(hours=24)
 def load_and_prepare_dataset():
     global dataset, dataset_size, last_refresh_time
@@ -36,7 +35,6 @@ def load_and_prepare_dataset():
     last_refresh_time = datetime.now()
 def check_and_refresh_dataset():
     global last_refresh_time
     current_time = datetime.now()
@@ -46,11 +44,10 @@ def check_and_refresh_dataset():
     ):
         load_and_prepare_dataset()
 # Initial dataset load
 load_and_prepare_dataset()
-# Load and prepare the dataset
 dataset = load_dataset(
     "taesiri/PhotoshopRequest-DailyDump",
     split="train",
@@ -70,79 +67,114 @@ BUFFER_SIZE = 1
 sample_iterator = None
 sample_count = 0
 def reshuffle_dataset():
     global sample_iterator, sample_count
-    seed = int(time.time())  # Convert time to an integer
     shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
     sample_iterator = iter(shuffled_dataset)
     sample_count = 0
 reshuffle_dataset()  # Initial shuffle
-def get_next_sample():
     check_and_refresh_dataset()
     global sample_count
-    if sample_count >= BUFFER_SIZE:
-        reshuffle_dataset()
-    sample = next(sample_iterator)
-    sample_count += 1
-    print(sample)
-    post_id = sample["post_id"]
-    title = sample["title"]
-    reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
-    selftext = ""
-    try:
-        selftext = json.loads(sample["json_data"])["post"]["selftext"]
-    except:
-        print("No selftext found")
-    markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
-    return (
-        markdown_text,
-        sample["source_image"],
-        sample["edited_image"],
-    )
 with gr.Blocks() as demo:
     gr.Markdown("# PhotoshopRequest Dataset Sampler")
     gr.Markdown(
         """
-    This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
-    Click the 'Sample New Item' button to retrieve a random sample from the dataset.
-    """
     )
-    post_info = gr.Markdown()
     with gr.Row():
-        source_image = gr.Image(label="Source Image")
-        edited_image = gr.Image(label="Edited Image")
     sample_button = gr.Button("Sample New Item")
     info_md = gr.Markdown()
-    def update_info():
-        return f"""
-    <div style="text-align: center;">
-    <hr>
-    Dataset Size: {dataset_size} items<br>
-    Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
-    </div>
-    """
     sample_button.click(
-        get_next_sample, outputs=[post_info, source_image, edited_image]
     ).then(update_info, outputs=[info_md])
 if __name__ == "__main__":

 last_refresh_time = None
 REFRESH_INTERVAL = timedelta(hours=24)
 def load_and_prepare_dataset():
     global dataset, dataset_size, last_refresh_time
     last_refresh_time = datetime.now()
 def check_and_refresh_dataset():
     global last_refresh_time
     current_time = datetime.now()
     ):
         load_and_prepare_dataset()
 # Initial dataset load
 load_and_prepare_dataset()
+# Load and prepare the dataset (again, as in your original code)
 dataset = load_dataset(
     "taesiri/PhotoshopRequest-DailyDump",
     split="train",
 sample_iterator = None
 sample_count = 0
 def reshuffle_dataset():
     global sample_iterator, sample_count
+    seed = int(time.time())  # Convert current time to an integer for randomness
     shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
     sample_iterator = iter(shuffled_dataset)
     sample_count = 0
 reshuffle_dataset()  # Initial shuffle
+def get_next_samples(num_samples=5):
+    """
+    Fetch 'num_samples' items from the dataset and return
+    the text/markdown + the source/edited images for each.
+    This will produce 3 * num_samples outputs in total.
+    """
     check_and_refresh_dataset()
     global sample_count
+    results = []
+    for _ in range(num_samples):
+        if sample_count >= BUFFER_SIZE:
+            reshuffle_dataset()
+        sample = next(sample_iterator)
+        sample_count += 1
+        print(sample)
+        post_id = sample["post_id"]
+        title = sample["title"]
+        reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
+        selftext = ""
+        try:
+            selftext = json.loads(sample["json_data"])["post"]["selftext"]
+        except:
+            print("No selftext found")
+        markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
+        # Append the triple (post_info, source_image, edited_image)
+        results.append(markdown_text)
+        results.append(sample["source_image"])
+        results.append(sample["edited_image"])
+    return tuple(results)
+def update_info():
+    """
+    Return a small HTML snippet with dataset stats and last refresh time.
+    """
+    return f"""
+    <div style="text-align: center;">
+        <hr>
+        Dataset Size: {dataset_size} items<br>
+        Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
+    </div>
+    """
+# Build the Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# PhotoshopRequest Dataset Sampler")
     gr.Markdown(
         """
+        This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
+        Click the 'Sample New Item' button to retrieve 5 random samples from the dataset (displayed as pairs).
+        """
     )
+    # We will create 5 sets of outputs: (markdown, source_image, edited_image)
+    # Each set is in its own column.
     with gr.Row():
+        with gr.Column():
+            post_info1 = gr.Markdown()
+            source_image1 = gr.Image(label="Source Image 1")
+            edited_image1 = gr.Image(label="Edited Image 1")
+        with gr.Column():
+            post_info2 = gr.Markdown()
+            source_image2 = gr.Image(label="Source Image 2")
+            edited_image2 = gr.Image(label="Edited Image 2")
+        with gr.Column():
+            post_info3 = gr.Markdown()
+            source_image3 = gr.Image(label="Source Image 3")
+            edited_image3 = gr.Image(label="Edited Image 3")
+        with gr.Column():
+            post_info4 = gr.Markdown()
+            source_image4 = gr.Image(label="Source Image 4")
+            edited_image4 = gr.Image(label="Edited Image 4")
+        with gr.Column():
+            post_info5 = gr.Markdown()
+            source_image5 = gr.Image(label="Source Image 5")
+            edited_image5 = gr.Image(label="Edited Image 5")
     sample_button = gr.Button("Sample New Item")
     info_md = gr.Markdown()
+    # When the button is clicked, get_next_samples returns 15 values (5 * 3),
+    # which we map to our 15 output components in the same order:
     sample_button.click(
+        get_next_samples,
+        outputs=[
+            post_info1, source_image1, edited_image1,
+            post_info2, source_image2, edited_image2,
+            post_info3, source_image3, edited_image3,
+            post_info4, source_image4, edited_image4,
+            post_info5, source_image5, edited_image5
+        ]
     ).then(update_info, outputs=[info_md])
 if __name__ == "__main__":