zhijianma commited on
Commit
ff72aa4
·
verified ·
1 Parent(s): e9e0960

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.css +182 -0
  2. app.py +335 -0
  3. requirements.txt +2 -0
app.css ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* code highlight: https://python-markdown.github.io/extensions/code_hilite/ */
2
+ .codehilite .hll { background-color: #ffffcc }
3
+ .codehilite { background: #f8f8f8; }
4
+ .codehilite .c { color: #408080; font-style: italic } /* Comment */
5
+ .codehilite .err { border: 1px solid #FF0000 } /* Error */
6
+ .codehilite .k { color: #008000; font-weight: bold } /* Keyword */
7
+ .codehilite .o { color: #666666 } /* Operator */
8
+ .codehilite .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
9
+ .codehilite .cm { color: #408080; font-style: italic } /* Comment.Multiline */
10
+ .codehilite .cp { color: #BC7A00 } /* Comment.Preproc */
11
+ .codehilite .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
12
+ .codehilite .c1 { color: #408080; font-style: italic } /* Comment.Single */
13
+ .codehilite .cs { color: #408080; font-style: italic } /* Comment.Special */
14
+ .codehilite .gd { color: #A00000 } /* Generic.Deleted */
15
+ .codehilite .ge { font-style: italic } /* Generic.Emph */
16
+ .codehilite .gr { color: #FF0000 } /* Generic.Error */
17
+ .codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */
18
+ .codehilite .gi { color: #00A000 } /* Generic.Inserted */
19
+ .codehilite .go { color: #888888 } /* Generic.Output */
20
+ .codehilite .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
21
+ .codehilite .gs { font-weight: bold } /* Generic.Strong */
22
+ .codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
23
+ .codehilite .gt { color: #0044DD } /* Generic.Traceback */
24
+ .codehilite .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
25
+ .codehilite .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
26
+ .codehilite .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
27
+ .codehilite .kp { color: #008000 } /* Keyword.Pseudo */
28
+ .codehilite .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
29
+ .codehilite .kt { color: #B00040 } /* Keyword.Type */
30
+ .codehilite .m { color: #666666 } /* Literal.Number */
31
+ .codehilite .s { color: #BA2121 } /* Literal.String */
32
+ .codehilite .na { color: #7D9029 } /* Name.Attribute */
33
+ .codehilite .nb { color: #008000 } /* Name.Builtin */
34
+ .codehilite .nc { color: #0000FF; font-weight: bold } /* Name.Class */
35
+ .codehilite .no { color: #880000 } /* Name.Constant */
36
+ .codehilite .nd { color: #AA22FF } /* Name.Decorator */
37
+ .codehilite .ni { color: #999999; font-weight: bold } /* Name.Entity */
38
+ .codehilite .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
39
+ .codehilite .nf { color: #0000FF } /* Name.Function */
40
+ .codehilite .nl { color: #A0A000 } /* Name.Label */
41
+ .codehilite .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
42
+ .codehilite .nt { color: #008000; font-weight: bold } /* Name.Tag */
43
+ .codehilite .nv { color: #19177C } /* Name.Variable */
44
+ .codehilite .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
45
+ .codehilite .w { color: #bbbbbb } /* Text.Whitespace */
46
+ .codehilite .mb { color: #666666 } /* Literal.Number.Bin */
47
+ .codehilite .mf { color: #666666 } /* Literal.Number.Float */
48
+ .codehilite .mh { color: #666666 } /* Literal.Number.Hex */
49
+ .codehilite .mi { color: #666666 } /* Literal.Number.Integer */
50
+ .codehilite .mo { color: #666666 } /* Literal.Number.Oct */
51
+ .codehilite .sa { color: #BA2121 } /* Literal.String.Affix */
52
+ .codehilite .sb { color: #BA2121 } /* Literal.String.Backtick */
53
+ .codehilite .sc { color: #BA2121 } /* Literal.String.Char */
54
+ .codehilite .dl { color: #BA2121 } /* Literal.String.Delimiter */
55
+ .codehilite .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
56
+ .codehilite .s2 { color: #BA2121 } /* Literal.String.Double */
57
+ .codehilite .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
58
+ .codehilite .sh { color: #BA2121 } /* Literal.String.Heredoc */
59
+ .codehilite .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
60
+ .codehilite .sx { color: #008000 } /* Literal.String.Other */
61
+ .codehilite .sr { color: #BB6688 } /* Literal.String.Regex */
62
+ .codehilite .s1 { color: #BA2121 } /* Literal.String.Single */
63
+ .codehilite .ss { color: #19177C } /* Literal.String.Symbol */
64
+ .codehilite .bp { color: #008000 } /* Name.Builtin.Pseudo */
65
+ .codehilite .fm { color: #0000FF } /* Name.Function.Magic */
66
+ .codehilite .vc { color: #19177C } /* Name.Variable.Class */
67
+ .codehilite .vg { color: #19177C } /* Name.Variable.Global */
68
+ .codehilite .vi { color: #19177C } /* Name.Variable.Instance */
69
+ .codehilite .vm { color: #19177C } /* Name.Variable.Magic */
70
+ .codehilite .il { color: #666666 } /* Literal.Number.Integer.Long */
71
+
72
+
73
+ .project_cover {
74
+ display: flex;
75
+ flex-direction: column;
76
+ justify-content: center;
77
+ align-items: center;
78
+ min-height: 650px;
79
+ border: 1px solid rgba(229, 231, 235, 0.6); /* 在边框中添加一点透明度 */
80
+ border-radius: 16px; /* 增加边框圆角 */
81
+ padding: 40px; /* 增加内部间距 */
82
+ background-color: #ffffff; /* 添加背景颜色 */
83
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* 添加轻微的阴影效果 */
84
+ }
85
+
86
+ .project_img {
87
+ overflow: hidden;
88
+ position: center;
89
+ display: flex;
90
+ justify-content: center;
91
+ align-items: center;
92
+ margin-bottom: auto;
93
+ /* box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15), 0 6px 20px rgba(0, 0, 0, 0.5); */
94
+ }
95
+
96
+ .project_img img {
97
+ width: 80%;
98
+ height: 80%;
99
+ }
100
+ .show_image {
101
+ justify-content: center;
102
+ align-items: center;
103
+ }
104
+
105
+ .show_image img {
106
+ width: 50%;
107
+ height: 50%;
108
+ }
109
+
110
+
111
+
112
+ .project_label {
113
+ font-size: 18px; /* 标题字体大小 */
114
+ color: #333; /* 字体颜色,这里使用深灰色 */
115
+ font-weight: bold; /* 字体加粗 */
116
+ text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1); /* 文字阴影 */
117
+ transition: all 0.3s ease; /* 平滑过渡动画 */
118
+ padding: 10px; /* 内填充 */
119
+ margin-bottom: 20px; /* 底部外边距 */
120
+ border-bottom: 2px solid #ddd; /* 底部边框样式 */
121
+ }
122
+
123
+ .project_name {
124
+ font-size: 30px; /* 调整字体大小 */
125
+ color: #333333; /* 字体颜色更深,增加对比度 */
126
+ margin-top: 20px; /* 调整名称顶部的间距 */
127
+ /* font-weight: bold; 字体加粗 */
128
+ /* text-transform: uppercase; 文字大写 */
129
+ align-items: center;
130
+ justify-content: center;
131
+ text-align: center; /* 文字居中 */
132
+ letter-spacing: 1.5px; /* 增加字母间距 */
133
+ transition: all 0.3s ease; /* 平滑过渡动画 */
134
+ }
135
+
136
+ .project_desc {
137
+ color: #444444; /* 字体颜色更深 */
138
+ font-size: 18px; /* 增加字体大小 */
139
+ margin: 20px 0; /* 增加上下间距 */
140
+ text-align: center; /* 文字居中 */
141
+ line-height: 1.5; /* 增加行高,提升可读性 */
142
+ transition: all 0.3s ease; /* 平滑过渡动画 */
143
+ }
144
+
145
+ .markdown-body .message {
146
+ white-space: pre-wrap;
147
+ }
148
+
149
+ .markdown-body details {
150
+ white-space: nowrap;
151
+ }
152
+ .markdown-body .bot details:not(:last-child) {
153
+ margin-bottom: 1px;
154
+ }
155
+ .markdown-body summary {
156
+ background-color: #4b5563;
157
+ color: #eee;
158
+ padding: 0 4px;
159
+ border-radius: 4px;
160
+ font-size: 0.9em;
161
+ }
162
+
163
+
164
+ .project_intro {
165
+ display: grid;
166
+ place-items: center; /* 完美居中 */
167
+ height: 100px; /* 高度 */
168
+ width:
169
+ font-size: 15px; /* 正文字体大小 */
170
+ /* text-align: center; 文字居中 */
171
+ color: #555; /* 正文字体颜色,这里使用较浅的灰色 */
172
+ border-radius: 8px; /* 边框圆角 */
173
+ transition: transform 0.3s ease; /* 平滑过渡动画 */
174
+ }
175
+
176
+ /* 鼠标悬停时的动画效果 */
177
+ .project_desc:hover,
178
+ .project_name:hover,
179
+ .project_label:hover,
180
+ .project_intro:hover {
181
+ transform: translateY(-5px); /* 向上移动 */
182
+ }
app.py ADDED
@@ -0,0 +1,335 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import inspect
3
+ import base64
4
+ import yaml
5
+ import copy
6
+ import shutil
7
+ import gradio as gr
8
+ from data_juicer.ops.base_op import OPERATORS
9
+ from data_juicer.utils.constant import Fields
10
+ demo_path = os.path.dirname(os.path.abspath(__file__))
11
+ project_path = os.path.dirname(os.path.dirname(demo_path))
12
+
13
+
14
+ # 图片本地路径转换为 base64 格式
15
+ def covert_image_to_base64(image_path):
16
+ # 获得文件后缀名
17
+ ext = image_path.split(".")[-1]
18
+ if ext not in ["gif", "jpeg", "png"]:
19
+ ext = "jpeg"
20
+
21
+ with open(image_path, "rb") as image_file:
22
+ # Read the file
23
+ encoded_string = base64.b64encode(image_file.read())
24
+
25
+ # Convert bytes to string
26
+ base64_data = encoded_string.decode("utf-8")
27
+
28
+ # 生成base64编码的地址
29
+ base64_url = f"data:image/{ext};base64,{base64_data}"
30
+ return base64_url
31
+
32
+
33
+ def format_cover_html(project_img_path):
34
+ readme_link = 'https://github.com/alibaba/data-juicer'
35
+ config = {
36
+ 'name': "Data-Juicer",
37
+ 'label': "Op Insight",
38
+ 'description': f'A One-Stop Data Processing System for Large Language Models.',
39
+ 'introduction':
40
+ "This project is being actively updated and maintained, and we will periodically enhance and add more features and data recipes. <br>"
41
+ "We welcome you to join us in promoting LLM data development and research!<br>",
42
+ 'demo':"You can experience the effect of the operators of Data-Juicer",
43
+ 'note':'Note: Due to resource limitations, only a subset of operations is available here. see more details in <a href="{readme_link}">GitHub</a>'
44
+ }
45
+ # image_src = covert_image_to_base64(project_img_path)
46
+ # <div class="project_img"> <img src={image_src} /> </div>
47
+ # <div class='project_cover'>
48
+ return f"""
49
+ <div>
50
+ <div class="project_name">{config.get("name", "")} </div>
51
+ <div class="project_desc">{config.get("description", "")}</div>
52
+ <div class="project_desc">{config.get("introduction", "")}</div>
53
+ <div class="project_desc">{config.get("demo", "")}</div>
54
+ <div class="project_desc">{config.get("note", "")}</div>
55
+ </div>
56
+ """
57
+ op_text = ''
58
+ docs_file = os.path.join(project_path, 'docs/Operators.md')
59
+ if os.path.exists(docs_file):
60
+ with open(os.path.join(project_path, 'docs/Operators.md'), 'r') as f:
61
+ op_text = f.read()
62
+
63
+ def extract_op_desc(markdown_text, header):
64
+ start_index = markdown_text.find(header)
65
+ end_index = markdown_text.find("\n##", start_index + len(header))
66
+ return markdown_text[start_index+ len(header):end_index].strip()
67
+
68
+ op_desc = f"<div style='text-align: center;'>{extract_op_desc(op_text, '## Overview').split('All the specific ')[0].strip()}</div>"
69
+ op_list_desc = {
70
+ 'mapper':extract_op_desc(op_text, '## Mapper <a name="mapper"/>'),
71
+ 'filter':extract_op_desc(op_text, '## Filter <a name="filter"/>'),
72
+ 'deduplicator':extract_op_desc(op_text, '## Deduplicator <a name="deduplicator"/>'),
73
+ 'selector':extract_op_desc(op_text, '## Selector <a name="selector"/>'),
74
+ }
75
+
76
+ op_types = ['mapper', 'filter',]# 'deduplicator'] , 'selector']
77
+ local_ops_dict = {op_type:[] for op_type in op_types}
78
+ multimodal = os.getenv('MULTI_MODAL', True)
79
+ multimodal_visible = False
80
+ text_key = 'text'
81
+ image_key = 'images'
82
+ audio_key = 'audios'
83
+ video_key = 'videos'
84
+
85
+ def get_op_lists(op_type):
86
+ use_local_op = os.getenv('USE_LOCAL_OP', False)
87
+ if not use_local_op:
88
+ all_ops = list(OPERATORS.modules.keys())
89
+ options = [
90
+ name for name in all_ops if name.endswith(op_type)
91
+ ]
92
+ else:
93
+ options = local_ops_dict.get(op_type, [])
94
+
95
+ for exclude in ['image', 'video', 'audio']:
96
+ options = [name for name in options if multimodal or exclude not in name]
97
+ return options
98
+
99
+ def show_code(op_name):
100
+ op_class = OPERATORS.modules[op_name]
101
+ text = inspect.getsourcelines(op_class)
102
+
103
+ init_signature = inspect.signature(op_class.__init__)
104
+
105
+ # 输出每个参数的名字和默认值
106
+ default_params = dict()
107
+ for name, parameter in init_signature.parameters.items():
108
+ if name in ['self', 'args', 'kwargs']:
109
+ continue # 跳过 'self' 参数
110
+ if parameter.default is not inspect.Parameter.empty:
111
+ default_params[name] = parameter.default
112
+
113
+ return ''.join(text[0]), yaml.dump(default_params)
114
+
115
+ def change_visible(op_name):
116
+ text_visible = True
117
+ video_visible = False
118
+ audio_visible = False
119
+ image_visible = False
120
+ if 'video' in op_name:
121
+ video_visible = True
122
+ elif 'audio' in op_name:
123
+ audio_visible = True
124
+ elif 'image' in op_name:
125
+ image_visible = True
126
+ return gr.update(visible=text_visible), gr.update(visible=image_visible), gr.update(visible=video_visible), gr.update(visible=audio_visible), gr.update(visible=text_visible), gr.update(visible=image_visible), gr.update(visible=video_visible), gr.update(visible=audio_visible)
127
+
128
+ def copy_func(file):
129
+ filename = None
130
+ if file:
131
+ filename= os.path.basename(file)
132
+ shutil.copyfile(file, filename)
133
+ return filename
134
+
135
+ def encode_sample(input_text, input_image, input_video, input_audio):
136
+ sample = dict()
137
+ sample[text_key]=input_text
138
+ sample[image_key]= [input_image] if input_image else []
139
+ sample[video_key]=[input_video] if input_video else []
140
+ sample[audio_key]=[input_audio] if input_audio else []
141
+ return sample
142
+
143
+ def decode_sample(output_sample):
144
+ output_text = output_sample[text_key]
145
+ output_image = output_sample[image_key][0] if output_sample[image_key] else None
146
+ output_video = output_sample[video_key][0] if output_sample[video_key] else None
147
+ output_audio = output_sample[audio_key][0] if output_sample[audio_key] else None
148
+ image_file = copy_func(output_image)
149
+ video_file = copy_func(output_video)
150
+ audio_file = copy_func(output_audio)
151
+ return output_text, image_file, video_file, audio_file
152
+
153
+ def create_tab_layout(op_tab, op_type, run_op, has_stats=False):
154
+ with op_tab:
155
+ options = get_op_lists(op_type)
156
+ label = f'Select a {op_type} to show details'
157
+ with gr.Row():
158
+ op_selector = gr.Dropdown(value=options[0], label=label, choices=options, interactive=True)
159
+ with gr.Column():
160
+ gr.Markdown(" **Op Parameters**")
161
+ op_params = gr.Code(label="Yaml",language='yaml', interactive=True)
162
+ run_button = gr.Button(value="🚀Run")
163
+ show_code_button = gr.Button(value="🔍Show Code")
164
+
165
+ with gr.Column():
166
+ with gr.Group('Inputs'):
167
+ gr.Markdown(" **Inputs**")
168
+ with gr.Row():
169
+ input_text = gr.TextArea(label="Text",interactive=True,)
170
+ input_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
171
+ input_video = gr.Video(label='Video', visible=multimodal_visible)
172
+ input_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
173
+
174
+ with gr.Group('Outputs'):
175
+ gr.Markdown(" **Outputs**")
176
+ with gr.Row():
177
+ output_text = gr.TextArea(label="Text",interactive=False,)
178
+ output_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
179
+ output_video = gr.Video(label='Video', visible=multimodal_visible)
180
+ output_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
181
+
182
+ with gr.Row():
183
+ if has_stats:
184
+ output_stats = gr.Json(label='Stats')
185
+ output_keep = gr.Text(label='Keep or not?', interactive=False)
186
+
187
+ code = gr.Code(label='Source', language='python')
188
+ inputs = [input_text, input_image, input_video, input_audio, op_selector, op_params]
189
+ outputs = [output_text, output_image, output_video, output_audio]
190
+ if has_stats:
191
+ outputs.append(output_stats)
192
+ outputs.append(output_keep)
193
+
194
+ def run_func(*args):
195
+ try:
196
+ try:
197
+ args = list(args)
198
+ op_params = args.pop()
199
+ params = yaml.safe_load(op_params)
200
+ except:
201
+ params = {}
202
+ if params is None:
203
+ params = {}
204
+ return run_op(*args, params)
205
+ except Exception as e:
206
+ gr.Error(str(e))
207
+ print(e)
208
+ return outputs
209
+
210
+ show_code_button.click(show_code, inputs=[op_selector], outputs=[code, op_params])
211
+ show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
212
+ run_button.click(run_func, inputs=inputs, outputs=outputs)
213
+ run_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
214
+ op_selector.select(show_code, inputs=[op_selector], outputs=[code, op_params])
215
+ op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
216
+ op_tab.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
217
+
218
+ def create_mapper_tab(op_type, op_tab):
219
+ with op_tab:
220
+ def run_op(input_text, input_image, input_video, input_audio, op_name, op_params):
221
+ op_class = OPERATORS.modules[op_name]
222
+ op = op_class(**op_params)
223
+ sample = encode_sample(input_text, input_image, input_video, input_audio)
224
+ output_sample = op.process(copy.deepcopy(sample))
225
+ return decode_sample(output_sample)
226
+ create_tab_layout(op_tab, op_type, run_op)
227
+
228
+
229
+ def create_filter_tab(op_type, op_tab):
230
+ def run_op(input_text, input_image, input_video, input_audio, op_name, op_params):
231
+ op_class = OPERATORS.modules[op_name]
232
+ op = op_class(**op_params)
233
+ sample = encode_sample(input_text, input_image, input_video, input_audio)
234
+ sample[Fields.stats] = dict()
235
+ output_sample = op.compute_stats(copy.deepcopy(sample))
236
+ if op.process(output_sample):
237
+ output_keep = 'Yes'
238
+ else:
239
+ output_keep = 'No'
240
+ output_stats = output_sample[Fields.stats]
241
+ return *decode_sample(output_sample), output_stats, output_keep
242
+ create_tab_layout(op_tab, op_type, run_op, has_stats=True)
243
+
244
+
245
+ def create_deduplicator_tab(op_type, op_tab):
246
+ with op_tab:
247
+ def run_op( input_text, input_image, input_video, input_audio, op_name, op_params):
248
+ op_class = OPERATORS.modules[op_name]
249
+ op = op_class(**op_params)
250
+ sample = encode_sample(input_text, input_image, input_video, input_audio)
251
+ output_sample = sample #op.compute_hash(copy.deepcopy(sample))
252
+ return decode_sample(output_sample)
253
+ create_tab_layout(op_tab, op_type, run_op, has_stats=True)
254
+
255
+ def create_tab_double_layout(op_tab, op_type, run_op):
256
+ with op_tab:
257
+ options = get_op_lists(op_type)
258
+ label = f'Select a {op_type} to show details'
259
+ with gr.Row():
260
+ op_selector = gr.Dropdown(value=options[0], label=label, choices=options, interactive=True)
261
+ with gr.Column():
262
+ gr.Markdown(" **Op Parameters**")
263
+ op_params = gr.Code(label="Yaml",language='yaml', interactive=True)
264
+ run_button = gr.Button(value="🚀Run")
265
+ show_code_button = gr.Button(value="🔍Show Code")
266
+
267
+ with gr.Column():
268
+ with gr.Group('Inputs'):
269
+ gr.Markdown(" **Inputs**")
270
+ with gr.Row():
271
+ input_text = gr.TextArea(label="Text",interactive=True,)
272
+ input_text2 = gr.TextArea(label="Text",interactive=True,)
273
+ input_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
274
+ input_image2 = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
275
+ input_video = gr.Video(label='Video', visible=multimodal_visible)
276
+ input_video2 = gr.Video(label='Video', visible=multimodal_visible)
277
+ input_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
278
+ input_audio2 = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
279
+
280
+ with gr.Group('Outputs'):
281
+ gr.Markdown(" **Outputs**")
282
+ with gr.Row():
283
+ output_text = gr.TextArea(label="Text",interactive=False,)
284
+ output_text2 = gr.TextArea(label="Text",interactive=False,)
285
+ output_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
286
+ output_image2 = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
287
+ output_video = gr.Video(label='Video', visible=multimodal_visible)
288
+ output_video2 = gr.Video(label='Video', visible=multimodal_visible)
289
+ output_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
290
+ output_audio2 = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
291
+
292
+ code = gr.Code(label='Source', language='python')
293
+ inputs = [input_text, input_image, input_video, input_audio, input_text2, input_image2, input_video2, input_audio2, op_selector, op_params]
294
+ outputs = [output_text, output_image, output_video, output_audio, output_text2, output_image2, output_video2, output_audio2]
295
+
296
+ def run_func(*args):
297
+ try:
298
+ try:
299
+ op_params = args[-1]
300
+ params = yaml.safe_load(op_params)
301
+ except:
302
+ params = {}
303
+ if params is None:
304
+ params = {}
305
+ return run_op(input_text, input_image, input_video, input_audio, op_selector, params)
306
+ except Exception as e:
307
+ gr.Error(str(e))
308
+ return outputs
309
+
310
+ # show_code_button.click(show_code, inputs=[op_selector], outputs=[code, op_params])
311
+ # show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
312
+ # run_button.click(run_func, inputs=inputs, outputs=outputs)
313
+ # op_selector.select(show_code, inputs=[op_selector], outputs=[code, op_params])
314
+ # op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
315
+ show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(show_code, inputs=[op_selector], outputs=[code, op_params])
316
+ run_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(run_func, inputs=[op_selector], outputs=[code, op_params])
317
+ op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(show_code, inputs=[op_selector], outputs=[code, op_params])
318
+ op_tab.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
319
+ with gr.Blocks(css="./app.css") as demo:
320
+
321
+ dj_image = os.path.join(project_path, 'docs/imgs/data-juicer.jpg')
322
+ gr.HTML(format_cover_html(dj_image))
323
+
324
+ with gr.Accordion(label='Op Insight',open=True):
325
+ tabs = gr.Tabs()
326
+ with tabs:
327
+ op_tabs = {op_type: gr.Tab(label=op_type.capitalize() + 's') for op_type in op_types}
328
+ for op_type, op_tab in op_tabs.items():
329
+ create_op_tab_func = globals().get(f'create_{op_type}_tab', None)
330
+ if callable(create_op_tab_func):
331
+ create_op_tab_func(op_type, op_tab)
332
+ else:
333
+ gr.Error(f'{op_type} not callable')
334
+
335
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pydantic>=2
2
+ git+https://gh-proxy.com/https://github.com/alibaba/data-juicer.git@demos/op_insight_slight#egg=py-data-juicer[all]