Spaces:
Runtime error
Runtime error
Raymond Weitekamp
commited on
Commit
·
47b06ca
1
Parent(s):
491aa62
refactor: improve leaderboard layout and heading text
Browse files- app.py +77 -25
- requirements.txt +2 -1
app.py
CHANGED
@@ -11,6 +11,7 @@ if gr.NO_RELOAD:
|
|
11 |
from PIL import Image # Needed for working with PIL images
|
12 |
import datasets
|
13 |
import numpy as np # Added to help handle numpy array images
|
|
|
14 |
|
15 |
# Load environment variables from .env if available.
|
16 |
from dotenv import load_dotenv
|
@@ -84,19 +85,43 @@ class SubmissionData(BaseModel):
|
|
84 |
class OCRDataCollector:
|
85 |
def __init__(self):
|
86 |
self.collected_pairs = []
|
|
|
87 |
self.current_text_block = self.get_random_text_block(201) # Default max words
|
88 |
self.hf_api = HfApi()
|
89 |
|
90 |
def get_random_text_block(self, max_words: int):
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
|
95 |
# Truncate to max_words if necessary
|
96 |
words = block.split()
|
97 |
if len(words) > max_words:
|
98 |
block = " ".join(words[:max_words])
|
99 |
-
|
|
|
100 |
return block
|
101 |
|
102 |
def submit_image(self, image, text_block, username: Optional[str] = None):
|
@@ -122,12 +147,43 @@ class OCRDataCollector:
|
|
122 |
if item['user'] != 'anonymous':
|
123 |
user_counts[item['user']] = user_counts.get(item['user'], 0) + 1
|
124 |
|
125 |
-
#
|
126 |
-
|
127 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
except Exception as e:
|
129 |
print(f"Error fetching leaderboard: {e}")
|
130 |
-
return []
|
131 |
|
132 |
def strip_metadata(image: Image.Image) -> Image.Image:
|
133 |
"""
|
@@ -158,21 +214,24 @@ def create_gradio_interface():
|
|
158 |
|
159 |
with gr.Blocks() as demo:
|
160 |
gr.Markdown("# Handwriting OCR Dataset Creator")
|
161 |
-
gr.Markdown("## After almost 100 years, handwriting recognition still sucks. Together, we can change that.")
|
162 |
|
163 |
# Add leaderboard section at the top
|
|
|
164 |
with gr.Row():
|
165 |
-
with gr.Column():
|
166 |
-
|
|
|
167 |
leaderboard = gr.Dataframe(
|
168 |
-
headers=["User", "Contributions"],
|
169 |
value=collector.get_leaderboard(),
|
170 |
elem_id="leaderboard",
|
171 |
visible=True,
|
172 |
-
interactive=False
|
|
|
173 |
)
|
174 |
-
|
175 |
-
|
|
|
176 |
gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
|
177 |
# Login section - centered
|
178 |
with gr.Row():
|
@@ -303,6 +362,9 @@ def create_gradio_interface():
|
|
303 |
|
304 |
# Load initial state and update UI visibility
|
305 |
demo.load(update_user_state, inputs=profile_state, outputs=[upload_info, image_input, dataset_options, button_row])
|
|
|
|
|
|
|
306 |
|
307 |
def handle_submit(
|
308 |
text: str,
|
@@ -451,16 +513,6 @@ def create_gradio_interface():
|
|
451 |
outputs=text_box
|
452 |
)
|
453 |
|
454 |
-
# Add leaderboard refresh handler
|
455 |
-
def refresh_leaderboard():
|
456 |
-
return collector.get_leaderboard()
|
457 |
-
|
458 |
-
refresh_btn.click(
|
459 |
-
fn=refresh_leaderboard,
|
460 |
-
inputs=[],
|
461 |
-
outputs=[leaderboard]
|
462 |
-
)
|
463 |
-
|
464 |
return demo
|
465 |
|
466 |
if __name__ == "__main__":
|
|
|
11 |
from PIL import Image # Needed for working with PIL images
|
12 |
import datasets
|
13 |
import numpy as np # Added to help handle numpy array images
|
14 |
+
import pandas as pd # Added for pandas DataFrame
|
15 |
|
16 |
# Load environment variables from .env if available.
|
17 |
from dotenv import load_dotenv
|
|
|
85 |
class OCRDataCollector:
|
86 |
def __init__(self):
|
87 |
self.collected_pairs = []
|
88 |
+
self.last_text_block = None
|
89 |
self.current_text_block = self.get_random_text_block(201) # Default max words
|
90 |
self.hf_api = HfApi()
|
91 |
|
92 |
def get_random_text_block(self, max_words: int):
|
93 |
+
attempts = 0
|
94 |
+
max_attempts = 10 # Prevent infinite loop in case of very small sentence list
|
95 |
+
|
96 |
+
while attempts < max_attempts:
|
97 |
+
block_length = random.randint(1, 5)
|
98 |
+
start_index = random.randint(0, len(sentences) - block_length)
|
99 |
+
block = " ".join(sentences[start_index:start_index + block_length])
|
100 |
+
|
101 |
+
# Truncate to max_words if necessary
|
102 |
+
words = block.split()
|
103 |
+
if len(words) > max_words:
|
104 |
+
block = " ".join(words[:max_words])
|
105 |
+
|
106 |
+
# If this block is different from the last one, use it
|
107 |
+
if block != self.last_text_block:
|
108 |
+
self.last_text_block = block
|
109 |
+
return block
|
110 |
+
|
111 |
+
attempts += 1
|
112 |
+
|
113 |
+
# If we couldn't find a different block after max attempts,
|
114 |
+
# force a different block by using the next available sentences
|
115 |
+
current_start = sentences.index(self.last_text_block.split('.')[0] + '.') if self.last_text_block else 0
|
116 |
+
next_start = (current_start + 1) % len(sentences)
|
117 |
+
block = sentences[next_start]
|
118 |
|
119 |
# Truncate to max_words if necessary
|
120 |
words = block.split()
|
121 |
if len(words) > max_words:
|
122 |
block = " ".join(words[:max_words])
|
123 |
+
|
124 |
+
self.last_text_block = block
|
125 |
return block
|
126 |
|
127 |
def submit_image(self, image, text_block, username: Optional[str] = None):
|
|
|
147 |
if item['user'] != 'anonymous':
|
148 |
user_counts[item['user']] = user_counts.get(item['user'], 0) + 1
|
149 |
|
150 |
+
# Create a pandas DataFrame for better styling
|
151 |
+
df = pd.DataFrame(user_counts.items(), columns=['Username', 'Contributions'])
|
152 |
+
df['Rank'] = range(1, len(df) + 1)
|
153 |
+
df['Medal'] = df['Rank'].apply(lambda x: "🏆" if x == 1 else "🥈" if x == 2 else "🥉" if x == 3 else "👏")
|
154 |
+
|
155 |
+
# Reorder columns
|
156 |
+
df = df[['Rank', 'Medal', 'Username', 'Contributions']]
|
157 |
+
|
158 |
+
# Style the DataFrame
|
159 |
+
styled_df = df.style\
|
160 |
+
.set_properties(**{
|
161 |
+
'text-align': 'center',
|
162 |
+
'font-size': '16px',
|
163 |
+
'padding': '10px',
|
164 |
+
'border': '1px solid #ddd'
|
165 |
+
})\
|
166 |
+
.set_table_styles([
|
167 |
+
{'selector': 'th', 'props': [
|
168 |
+
('background-color', '#f4f4f4'),
|
169 |
+
('color', '#333'),
|
170 |
+
('font-weight', 'bold'),
|
171 |
+
('text-align', 'center'),
|
172 |
+
('padding', '12px'),
|
173 |
+
('border', '1px solid #ddd')
|
174 |
+
]},
|
175 |
+
{'selector': 'tr:nth-of-type(odd)', 'props': [
|
176 |
+
('background-color', '#f9f9f9')
|
177 |
+
]},
|
178 |
+
{'selector': 'tr:hover', 'props': [
|
179 |
+
('background-color', '#f5f5f5')
|
180 |
+
]}
|
181 |
+
])
|
182 |
+
|
183 |
+
return styled_df
|
184 |
except Exception as e:
|
185 |
print(f"Error fetching leaderboard: {e}")
|
186 |
+
return pd.DataFrame(columns=['Rank', 'Medal', 'Username', 'Contributions'])
|
187 |
|
188 |
def strip_metadata(image: Image.Image) -> Image.Image:
|
189 |
"""
|
|
|
214 |
|
215 |
with gr.Blocks() as demo:
|
216 |
gr.Markdown("# Handwriting OCR Dataset Creator")
|
217 |
+
gr.Markdown("## After almost 100 years of research, handwriting recognition still sucks. Together, we can change that.")
|
218 |
|
219 |
# Add leaderboard section at the top
|
220 |
+
gr.Markdown("### 🏆 Top Contributors", show_label=False)
|
221 |
with gr.Row():
|
222 |
+
with gr.Column(scale=1):
|
223 |
+
pass
|
224 |
+
with gr.Column(scale=2, min_width=400):
|
225 |
leaderboard = gr.Dataframe(
|
|
|
226 |
value=collector.get_leaderboard(),
|
227 |
elem_id="leaderboard",
|
228 |
visible=True,
|
229 |
+
interactive=False,
|
230 |
+
show_label=False
|
231 |
)
|
232 |
+
with gr.Column(scale=1):
|
233 |
+
pass
|
234 |
+
|
235 |
gr.Markdown("### Step 1: Log in with your Hugging Face account to use this app.")
|
236 |
# Login section - centered
|
237 |
with gr.Row():
|
|
|
362 |
|
363 |
# Load initial state and update UI visibility
|
364 |
demo.load(update_user_state, inputs=profile_state, outputs=[upload_info, image_input, dataset_options, button_row])
|
365 |
+
|
366 |
+
# Also load leaderboard on page load
|
367 |
+
demo.load(fn=lambda: collector.get_leaderboard(), outputs=leaderboard)
|
368 |
|
369 |
def handle_submit(
|
370 |
text: str,
|
|
|
513 |
outputs=text_box
|
514 |
)
|
515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
516 |
return demo
|
517 |
|
518 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -7,4 +7,5 @@ pytest-asyncio>=0.23.0
|
|
7 |
playwright>=1.40.0
|
8 |
datasets>=2.16.0
|
9 |
pydantic>=2.6.1
|
10 |
-
python-dotenv>=1.0.0
|
|
|
|
7 |
playwright>=1.40.0
|
8 |
datasets>=2.16.0
|
9 |
pydantic>=2.6.1
|
10 |
+
python-dotenv>=1.0.0
|
11 |
+
pandas>=2.0.0
|