Pclanglais commited on
Commit
6849ffb
1 Parent(s): 3f2aaa1

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +3 -3
  2. app.py +125 -0
  3. requirements.in +2 -0
  4. requirements.txt +217 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Finance Commons
3
  emoji: 💻
4
- colorFrom: purple
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.38.1
8
  app_file: app.py
 
1
  ---
2
+ title: Finance Commons Explorer
3
  emoji: 💻
4
+ colorFrom: red
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 4.38.1
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+
4
+ import gradio as gr
5
+ from huggingface_hub import get_collection
6
+
7
+
8
+ def extract_collection_id(input_text):
9
+ if url_match := re.match(r"https://huggingface\.co/collections/(.+)$", input_text):
10
+ return url_match[1]
11
+
12
+ # Check if input is already in the correct format
13
+ return input_text if re.match(r"^[\w-]+/[\w-]+", input_text) else None
14
+
15
+
16
+ def load_collection():
17
+ collection_input = os.getenv("COLLECTION_SLUG_OR_URL")
18
+ if not collection_input:
19
+ raise ValueError("COLLECTION_SLUG_OR_URL environment variable is not set.")
20
+
21
+ collection_id = extract_collection_id(collection_input)
22
+ if not collection_id:
23
+ raise ValueError(
24
+ "Invalid collection ID or URL in COLLECTION_SLUG_OR_URL environment variable."
25
+ )
26
+
27
+ collection = get_collection(collection_id)
28
+ if dataset_ids := [
29
+ item.item_id for item in collection.items if item.item_type == "dataset"
30
+ ]:
31
+ return dataset_ids, collection_id
32
+ else:
33
+ raise ValueError("No datasets found in this collection.")
34
+
35
+
36
+ def display_dataset(dataset_ids, index):
37
+ dataset_id = dataset_ids[index]
38
+ return gr.HTML(f"""<iframe
39
+ src="https://huggingface.co/datasets/{dataset_id}/embed/viewer"
40
+ frameborder="0"
41
+ width="100%"
42
+ height="560px"
43
+ ></iframe>""")
44
+
45
+
46
+ def navigate_dataset(dataset_ids, index, direction):
47
+ new_index = (index + direction) % len(dataset_ids)
48
+ return (
49
+ new_index,
50
+ f"Dataset {new_index + 1} of {len(dataset_ids)}: {dataset_ids[new_index]}",
51
+ )
52
+
53
+
54
+ def get_display_name(collection_id):
55
+ # Pattern to match username/repo-name with an optional ID of 16 or more hexadecimal characters
56
+ pattern = r"^(.+?)-([a-f0-9]{16,})$"
57
+ if match := re.match(pattern, collection_id):
58
+ return match[1]
59
+ else:
60
+ # If no match, return the original
61
+ return collection_id
62
+
63
+
64
+ try:
65
+ dataset_ids, collection_id = load_collection()
66
+ display_name = get_display_name(collection_id)
67
+
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown(f"<h1>Dataset Viewer for Collection: {display_name}</h1>")
70
+ gr.Markdown(
71
+ f"[View full collection on Hugging Face](https://huggingface.co/collections/{collection_id})"
72
+ )
73
+
74
+ gr.Markdown("""
75
+ This app allows you to browse and view datasets from a specific Hugging Face collection.
76
+ Use the 'Previous' and 'Next' buttons to navigate through the datasets in the collection.
77
+ See below for how to set up this app for a different collection.""")
78
+
79
+ index_state = gr.State(value=0)
80
+
81
+ with gr.Row():
82
+ left_btn = gr.Button("Previous")
83
+ right_btn = gr.Button("Next")
84
+
85
+ dataset_info = gr.Markdown(f"Dataset 1 of {len(dataset_ids)}: {dataset_ids[0]}")
86
+ iframe_output = gr.HTML()
87
+ gr.Markdown("""**Note**: This space is currently set up to display datasets from a specific collection.
88
+ If you'd like to use it for a different collection:
89
+ 1. Duplicate this space
90
+ 2. In your duplicated space, set the `COLLECTION_SLUG_OR_URL` environment variable to your desired collection ID or URL
91
+ 3. Your new space will then display datasets from your chosen collection!
92
+ Checkout the [docs](https://huggingface.co/docs/hub/datasets-viewer-embed) for other ways to use the iframe viewer.
93
+ """)
94
+ left_btn.click(
95
+ navigate_dataset,
96
+ inputs=[gr.State(dataset_ids), index_state, gr.Number(-1, visible=False)],
97
+ outputs=[index_state, dataset_info],
98
+ )
99
+ right_btn.click(
100
+ navigate_dataset,
101
+ inputs=[gr.State(dataset_ids), index_state, gr.Number(1, visible=False)],
102
+ outputs=[index_state, dataset_info],
103
+ )
104
+
105
+ index_state.change(
106
+ display_dataset,
107
+ inputs=[gr.State(dataset_ids), index_state],
108
+ outputs=[iframe_output],
109
+ )
110
+
111
+ # Initialize the display with the first dataset
112
+ demo.load(
113
+ fn=lambda: display_dataset(dataset_ids, 0),
114
+ inputs=None,
115
+ outputs=[iframe_output],
116
+ )
117
+
118
+ if __name__ == "__main__":
119
+ demo.launch()
120
+
121
+ except Exception as e:
122
+ print(f"Error: {str(e)}")
123
+ print(
124
+ "Please set the COLLECTION_SLUG_OR_URL environment variable with a valid collection ID or URL."
125
+ )
requirements.in ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ huggingface_hub
2
+ gradio==4.38.1
requirements.txt ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in -o requirements.txt
3
+ aiofiles==23.2.1
4
+ # via gradio
5
+ altair==5.3.0
6
+ # via gradio
7
+ annotated-types==0.7.0
8
+ # via pydantic
9
+ anyio==4.4.0
10
+ # via
11
+ # httpx
12
+ # starlette
13
+ # watchfiles
14
+ attrs==23.2.0
15
+ # via
16
+ # jsonschema
17
+ # referencing
18
+ certifi==2024.7.4
19
+ # via
20
+ # httpcore
21
+ # httpx
22
+ # requests
23
+ charset-normalizer==3.3.2
24
+ # via requests
25
+ click==8.1.7
26
+ # via
27
+ # typer
28
+ # uvicorn
29
+ contourpy==1.2.1
30
+ # via matplotlib
31
+ cycler==0.12.1
32
+ # via matplotlib
33
+ dnspython==2.6.1
34
+ # via email-validator
35
+ email-validator==2.2.0
36
+ # via fastapi
37
+ fastapi==0.111.1
38
+ # via gradio
39
+ fastapi-cli==0.0.4
40
+ # via fastapi
41
+ ffmpy==0.3.2
42
+ # via gradio
43
+ filelock==3.15.4
44
+ # via huggingface-hub
45
+ fonttools==4.53.1
46
+ # via matplotlib
47
+ fsspec==2024.6.1
48
+ # via
49
+ # gradio-client
50
+ # huggingface-hub
51
+ gradio==4.38.1
52
+ # via -r requirements.in
53
+ gradio-client==1.1.0
54
+ # via gradio
55
+ h11==0.14.0
56
+ # via
57
+ # httpcore
58
+ # uvicorn
59
+ httpcore==1.0.5
60
+ # via httpx
61
+ httptools==0.6.1
62
+ # via uvicorn
63
+ httpx==0.27.0
64
+ # via
65
+ # fastapi
66
+ # gradio
67
+ # gradio-client
68
+ huggingface-hub==0.23.5
69
+ # via
70
+ # -r requirements.in
71
+ # gradio
72
+ # gradio-client
73
+ idna==3.7
74
+ # via
75
+ # anyio
76
+ # email-validator
77
+ # httpx
78
+ # requests
79
+ importlib-resources==6.4.0
80
+ # via gradio
81
+ jinja2==3.1.4
82
+ # via
83
+ # altair
84
+ # fastapi
85
+ # gradio
86
+ jsonschema==4.23.0
87
+ # via altair
88
+ jsonschema-specifications==2023.12.1
89
+ # via jsonschema
90
+ kiwisolver==1.4.5
91
+ # via matplotlib
92
+ markdown-it-py==3.0.0
93
+ # via rich
94
+ markupsafe==2.1.5
95
+ # via
96
+ # gradio
97
+ # jinja2
98
+ matplotlib==3.9.1
99
+ # via gradio
100
+ mdurl==0.1.2
101
+ # via markdown-it-py
102
+ numpy==2.0.0
103
+ # via
104
+ # altair
105
+ # contourpy
106
+ # gradio
107
+ # matplotlib
108
+ # pandas
109
+ orjson==3.10.6
110
+ # via gradio
111
+ packaging==24.1
112
+ # via
113
+ # altair
114
+ # gradio
115
+ # gradio-client
116
+ # huggingface-hub
117
+ # matplotlib
118
+ pandas==2.2.2
119
+ # via
120
+ # altair
121
+ # gradio
122
+ pillow==10.4.0
123
+ # via
124
+ # gradio
125
+ # matplotlib
126
+ pydantic==2.8.2
127
+ # via
128
+ # fastapi
129
+ # gradio
130
+ pydantic-core==2.20.1
131
+ # via pydantic
132
+ pydub==0.25.1
133
+ # via gradio
134
+ pygments==2.18.0
135
+ # via rich
136
+ pyparsing==3.1.2
137
+ # via matplotlib
138
+ python-dateutil==2.9.0.post0
139
+ # via
140
+ # matplotlib
141
+ # pandas
142
+ python-dotenv==1.0.1
143
+ # via uvicorn
144
+ python-multipart==0.0.9
145
+ # via
146
+ # fastapi
147
+ # gradio
148
+ pytz==2024.1
149
+ # via pandas
150
+ pyyaml==6.0.1
151
+ # via
152
+ # gradio
153
+ # huggingface-hub
154
+ # uvicorn
155
+ referencing==0.35.1
156
+ # via
157
+ # jsonschema
158
+ # jsonschema-specifications
159
+ requests==2.32.3
160
+ # via huggingface-hub
161
+ rich==13.7.1
162
+ # via typer
163
+ rpds-py==0.19.0
164
+ # via
165
+ # jsonschema
166
+ # referencing
167
+ ruff==0.5.2
168
+ # via gradio
169
+ semantic-version==2.10.0
170
+ # via gradio
171
+ shellingham==1.5.4
172
+ # via typer
173
+ six==1.16.0
174
+ # via python-dateutil
175
+ sniffio==1.3.1
176
+ # via
177
+ # anyio
178
+ # httpx
179
+ starlette==0.37.2
180
+ # via fastapi
181
+ tomlkit==0.12.0
182
+ # via gradio
183
+ toolz==0.12.1
184
+ # via altair
185
+ tqdm==4.66.4
186
+ # via huggingface-hub
187
+ typer==0.12.3
188
+ # via
189
+ # fastapi-cli
190
+ # gradio
191
+ typing-extensions==4.12.2
192
+ # via
193
+ # fastapi
194
+ # gradio
195
+ # gradio-client
196
+ # huggingface-hub
197
+ # pydantic
198
+ # pydantic-core
199
+ # typer
200
+ tzdata==2024.1
201
+ # via pandas
202
+ urllib3==2.2.2
203
+ # via
204
+ # gradio
205
+ # requests
206
+ uvicorn==0.30.1
207
+ # via
208
+ # fastapi
209
+ # gradio
210
+ uvloop==0.19.0
211
+ # via uvicorn
212
+ watchfiles==0.22.0
213
+ # via uvicorn
214
+ websockets==11.0.3
215
+ # via
216
+ # gradio-client
217
+ # uvicorn