taesiri commited on
Commit
ef2d262
·
verified ·
1 Parent(s): b99caf6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -51
app.py CHANGED
@@ -15,7 +15,6 @@ dataset_size = "Unknown"
15
  last_refresh_time = None
16
  REFRESH_INTERVAL = timedelta(hours=24)
17
 
18
-
19
  def load_and_prepare_dataset():
20
  global dataset, dataset_size, last_refresh_time
21
 
@@ -36,7 +35,6 @@ def load_and_prepare_dataset():
36
 
37
  last_refresh_time = datetime.now()
38
 
39
-
40
  def check_and_refresh_dataset():
41
  global last_refresh_time
42
  current_time = datetime.now()
@@ -46,11 +44,10 @@ def check_and_refresh_dataset():
46
  ):
47
  load_and_prepare_dataset()
48
 
49
-
50
  # Initial dataset load
51
  load_and_prepare_dataset()
52
 
53
- # Load and prepare the dataset
54
  dataset = load_dataset(
55
  "taesiri/PhotoshopRequest-DailyDump",
56
  split="train",
@@ -70,79 +67,114 @@ BUFFER_SIZE = 1
70
  sample_iterator = None
71
  sample_count = 0
72
 
73
-
74
  def reshuffle_dataset():
75
  global sample_iterator, sample_count
76
- seed = int(time.time()) # Convert time to an integer
77
  shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
78
  sample_iterator = iter(shuffled_dataset)
79
  sample_count = 0
80
 
81
-
82
  reshuffle_dataset() # Initial shuffle
83
 
84
-
85
- def get_next_sample():
 
 
 
 
86
  check_and_refresh_dataset()
 
87
  global sample_count
88
 
89
- if sample_count >= BUFFER_SIZE:
90
- reshuffle_dataset()
91
-
92
- sample = next(sample_iterator)
93
- sample_count += 1
94
- print(sample)
95
-
96
- post_id = sample["post_id"]
97
- title = sample["title"]
98
- reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
99
-
100
- selftext = ""
101
- try:
102
- selftext = json.loads(sample["json_data"])["post"]["selftext"]
103
- except:
104
- print("No selftext found")
105
-
106
- markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
107
-
108
- return (
109
- markdown_text,
110
- sample["source_image"],
111
- sample["edited_image"],
112
- )
 
113
 
 
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  with gr.Blocks() as demo:
116
  gr.Markdown("# PhotoshopRequest Dataset Sampler")
117
 
118
  gr.Markdown(
119
  """
120
- This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
121
- Click the 'Sample New Item' button to retrieve a random sample from the dataset.
122
- """
123
  )
124
 
125
- post_info = gr.Markdown()
126
-
127
  with gr.Row():
128
- source_image = gr.Image(label="Source Image")
129
- edited_image = gr.Image(label="Edited Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  sample_button = gr.Button("Sample New Item")
132
-
133
  info_md = gr.Markdown()
134
 
135
- def update_info():
136
- return f"""
137
- <div style="text-align: center;">
138
- <hr>
139
- Dataset Size: {dataset_size} items<br>
140
- Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
141
- </div>
142
- """
143
-
144
  sample_button.click(
145
- get_next_sample, outputs=[post_info, source_image, edited_image]
 
 
 
 
 
 
 
146
  ).then(update_info, outputs=[info_md])
147
 
148
  if __name__ == "__main__":
 
15
  last_refresh_time = None
16
  REFRESH_INTERVAL = timedelta(hours=24)
17
 
 
18
  def load_and_prepare_dataset():
19
  global dataset, dataset_size, last_refresh_time
20
 
 
35
 
36
  last_refresh_time = datetime.now()
37
 
 
38
  def check_and_refresh_dataset():
39
  global last_refresh_time
40
  current_time = datetime.now()
 
44
  ):
45
  load_and_prepare_dataset()
46
 
 
47
  # Initial dataset load
48
  load_and_prepare_dataset()
49
 
50
+ # Load and prepare the dataset (again, as in your original code)
51
  dataset = load_dataset(
52
  "taesiri/PhotoshopRequest-DailyDump",
53
  split="train",
 
67
  sample_iterator = None
68
  sample_count = 0
69
 
 
70
  def reshuffle_dataset():
71
  global sample_iterator, sample_count
72
+ seed = int(time.time()) # Convert current time to an integer for randomness
73
  shuffled_dataset = dataset.shuffle(seed=seed, buffer_size=BUFFER_SIZE)
74
  sample_iterator = iter(shuffled_dataset)
75
  sample_count = 0
76
 
 
77
  reshuffle_dataset() # Initial shuffle
78
 
79
+ def get_next_samples(num_samples=5):
80
+ """
81
+ Fetch 'num_samples' items from the dataset and return
82
+ the text/markdown + the source/edited images for each.
83
+ This will produce 3 * num_samples outputs in total.
84
+ """
85
  check_and_refresh_dataset()
86
+
87
  global sample_count
88
 
89
+ results = []
90
+ for _ in range(num_samples):
91
+ if sample_count >= BUFFER_SIZE:
92
+ reshuffle_dataset()
93
+
94
+ sample = next(sample_iterator)
95
+ sample_count += 1
96
+ print(sample)
97
+
98
+ post_id = sample["post_id"]
99
+ title = sample["title"]
100
+ reddit_url = f"https://www.reddit.com/r/PhotoshopRequest/comments/{post_id}"
101
+
102
+ selftext = ""
103
+ try:
104
+ selftext = json.loads(sample["json_data"])["post"]["selftext"]
105
+ except:
106
+ print("No selftext found")
107
+
108
+ markdown_text = f"# {title}\n\n{selftext}\n\n[View post on r/PhotoshopRequest]({reddit_url})"
109
+
110
+ # Append the triple (post_info, source_image, edited_image)
111
+ results.append(markdown_text)
112
+ results.append(sample["source_image"])
113
+ results.append(sample["edited_image"])
114
 
115
+ return tuple(results)
116
 
117
+ def update_info():
118
+ """
119
+ Return a small HTML snippet with dataset stats and last refresh time.
120
+ """
121
+ return f"""
122
+ <div style="text-align: center;">
123
+ <hr>
124
+ Dataset Size: {dataset_size} items<br>
125
+ Last Refreshed: {last_refresh_time.strftime('%Y-%m-%d %H:%M:%S UTC') if last_refresh_time else 'Unknown'}
126
+ </div>
127
+ """
128
+
129
+ # Build the Gradio interface
130
  with gr.Blocks() as demo:
131
  gr.Markdown("# PhotoshopRequest Dataset Sampler")
132
 
133
  gr.Markdown(
134
  """
135
+ This is a preview of the PhotoshopRequest dataset. Each sample represents a Photoshop editing request post.
136
+ Click the 'Sample New Item' button to retrieve 5 random samples from the dataset (displayed as pairs).
137
+ """
138
  )
139
 
140
+ # We will create 5 sets of outputs: (markdown, source_image, edited_image)
141
+ # Each set is in its own column.
142
  with gr.Row():
143
+ with gr.Column():
144
+ post_info1 = gr.Markdown()
145
+ source_image1 = gr.Image(label="Source Image 1")
146
+ edited_image1 = gr.Image(label="Edited Image 1")
147
+ with gr.Column():
148
+ post_info2 = gr.Markdown()
149
+ source_image2 = gr.Image(label="Source Image 2")
150
+ edited_image2 = gr.Image(label="Edited Image 2")
151
+ with gr.Column():
152
+ post_info3 = gr.Markdown()
153
+ source_image3 = gr.Image(label="Source Image 3")
154
+ edited_image3 = gr.Image(label="Edited Image 3")
155
+ with gr.Column():
156
+ post_info4 = gr.Markdown()
157
+ source_image4 = gr.Image(label="Source Image 4")
158
+ edited_image4 = gr.Image(label="Edited Image 4")
159
+ with gr.Column():
160
+ post_info5 = gr.Markdown()
161
+ source_image5 = gr.Image(label="Source Image 5")
162
+ edited_image5 = gr.Image(label="Edited Image 5")
163
 
164
  sample_button = gr.Button("Sample New Item")
 
165
  info_md = gr.Markdown()
166
 
167
+ # When the button is clicked, get_next_samples returns 15 values (5 * 3),
168
+ # which we map to our 15 output components in the same order:
 
 
 
 
 
 
 
169
  sample_button.click(
170
+ get_next_samples,
171
+ outputs=[
172
+ post_info1, source_image1, edited_image1,
173
+ post_info2, source_image2, edited_image2,
174
+ post_info3, source_image3, edited_image3,
175
+ post_info4, source_image4, edited_image4,
176
+ post_info5, source_image5, edited_image5
177
+ ]
178
  ).then(update_info, outputs=[info_md])
179
 
180
  if __name__ == "__main__":