Upload 3 files
Browse files- app.css +182 -0
- app.py +335 -0
- requirements.txt +2 -0
app.css
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/* code highlight: https://python-markdown.github.io/extensions/code_hilite/ */
|
2 |
+
.codehilite .hll { background-color: #ffffcc }
|
3 |
+
.codehilite { background: #f8f8f8; }
|
4 |
+
.codehilite .c { color: #408080; font-style: italic } /* Comment */
|
5 |
+
.codehilite .err { border: 1px solid #FF0000 } /* Error */
|
6 |
+
.codehilite .k { color: #008000; font-weight: bold } /* Keyword */
|
7 |
+
.codehilite .o { color: #666666 } /* Operator */
|
8 |
+
.codehilite .ch { color: #408080; font-style: italic } /* Comment.Hashbang */
|
9 |
+
.codehilite .cm { color: #408080; font-style: italic } /* Comment.Multiline */
|
10 |
+
.codehilite .cp { color: #BC7A00 } /* Comment.Preproc */
|
11 |
+
.codehilite .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */
|
12 |
+
.codehilite .c1 { color: #408080; font-style: italic } /* Comment.Single */
|
13 |
+
.codehilite .cs { color: #408080; font-style: italic } /* Comment.Special */
|
14 |
+
.codehilite .gd { color: #A00000 } /* Generic.Deleted */
|
15 |
+
.codehilite .ge { font-style: italic } /* Generic.Emph */
|
16 |
+
.codehilite .gr { color: #FF0000 } /* Generic.Error */
|
17 |
+
.codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */
|
18 |
+
.codehilite .gi { color: #00A000 } /* Generic.Inserted */
|
19 |
+
.codehilite .go { color: #888888 } /* Generic.Output */
|
20 |
+
.codehilite .gp { color: #000080; font-weight: bold } /* Generic.Prompt */
|
21 |
+
.codehilite .gs { font-weight: bold } /* Generic.Strong */
|
22 |
+
.codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
|
23 |
+
.codehilite .gt { color: #0044DD } /* Generic.Traceback */
|
24 |
+
.codehilite .kc { color: #008000; font-weight: bold } /* Keyword.Constant */
|
25 |
+
.codehilite .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */
|
26 |
+
.codehilite .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */
|
27 |
+
.codehilite .kp { color: #008000 } /* Keyword.Pseudo */
|
28 |
+
.codehilite .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */
|
29 |
+
.codehilite .kt { color: #B00040 } /* Keyword.Type */
|
30 |
+
.codehilite .m { color: #666666 } /* Literal.Number */
|
31 |
+
.codehilite .s { color: #BA2121 } /* Literal.String */
|
32 |
+
.codehilite .na { color: #7D9029 } /* Name.Attribute */
|
33 |
+
.codehilite .nb { color: #008000 } /* Name.Builtin */
|
34 |
+
.codehilite .nc { color: #0000FF; font-weight: bold } /* Name.Class */
|
35 |
+
.codehilite .no { color: #880000 } /* Name.Constant */
|
36 |
+
.codehilite .nd { color: #AA22FF } /* Name.Decorator */
|
37 |
+
.codehilite .ni { color: #999999; font-weight: bold } /* Name.Entity */
|
38 |
+
.codehilite .ne { color: #D2413A; font-weight: bold } /* Name.Exception */
|
39 |
+
.codehilite .nf { color: #0000FF } /* Name.Function */
|
40 |
+
.codehilite .nl { color: #A0A000 } /* Name.Label */
|
41 |
+
.codehilite .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */
|
42 |
+
.codehilite .nt { color: #008000; font-weight: bold } /* Name.Tag */
|
43 |
+
.codehilite .nv { color: #19177C } /* Name.Variable */
|
44 |
+
.codehilite .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */
|
45 |
+
.codehilite .w { color: #bbbbbb } /* Text.Whitespace */
|
46 |
+
.codehilite .mb { color: #666666 } /* Literal.Number.Bin */
|
47 |
+
.codehilite .mf { color: #666666 } /* Literal.Number.Float */
|
48 |
+
.codehilite .mh { color: #666666 } /* Literal.Number.Hex */
|
49 |
+
.codehilite .mi { color: #666666 } /* Literal.Number.Integer */
|
50 |
+
.codehilite .mo { color: #666666 } /* Literal.Number.Oct */
|
51 |
+
.codehilite .sa { color: #BA2121 } /* Literal.String.Affix */
|
52 |
+
.codehilite .sb { color: #BA2121 } /* Literal.String.Backtick */
|
53 |
+
.codehilite .sc { color: #BA2121 } /* Literal.String.Char */
|
54 |
+
.codehilite .dl { color: #BA2121 } /* Literal.String.Delimiter */
|
55 |
+
.codehilite .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */
|
56 |
+
.codehilite .s2 { color: #BA2121 } /* Literal.String.Double */
|
57 |
+
.codehilite .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */
|
58 |
+
.codehilite .sh { color: #BA2121 } /* Literal.String.Heredoc */
|
59 |
+
.codehilite .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */
|
60 |
+
.codehilite .sx { color: #008000 } /* Literal.String.Other */
|
61 |
+
.codehilite .sr { color: #BB6688 } /* Literal.String.Regex */
|
62 |
+
.codehilite .s1 { color: #BA2121 } /* Literal.String.Single */
|
63 |
+
.codehilite .ss { color: #19177C } /* Literal.String.Symbol */
|
64 |
+
.codehilite .bp { color: #008000 } /* Name.Builtin.Pseudo */
|
65 |
+
.codehilite .fm { color: #0000FF } /* Name.Function.Magic */
|
66 |
+
.codehilite .vc { color: #19177C } /* Name.Variable.Class */
|
67 |
+
.codehilite .vg { color: #19177C } /* Name.Variable.Global */
|
68 |
+
.codehilite .vi { color: #19177C } /* Name.Variable.Instance */
|
69 |
+
.codehilite .vm { color: #19177C } /* Name.Variable.Magic */
|
70 |
+
.codehilite .il { color: #666666 } /* Literal.Number.Integer.Long */
|
71 |
+
|
72 |
+
|
73 |
+
.project_cover {
|
74 |
+
display: flex;
|
75 |
+
flex-direction: column;
|
76 |
+
justify-content: center;
|
77 |
+
align-items: center;
|
78 |
+
min-height: 650px;
|
79 |
+
border: 1px solid rgba(229, 231, 235, 0.6); /* 在边框中添加一点透明度 */
|
80 |
+
border-radius: 16px; /* 增加边框圆角 */
|
81 |
+
padding: 40px; /* 增加内部间距 */
|
82 |
+
background-color: #ffffff; /* 添加背景颜色 */
|
83 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); /* 添加轻微的阴影效果 */
|
84 |
+
}
|
85 |
+
|
86 |
+
.project_img {
|
87 |
+
overflow: hidden;
|
88 |
+
position: center;
|
89 |
+
display: flex;
|
90 |
+
justify-content: center;
|
91 |
+
align-items: center;
|
92 |
+
margin-bottom: auto;
|
93 |
+
/* box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15), 0 6px 20px rgba(0, 0, 0, 0.5); */
|
94 |
+
}
|
95 |
+
|
96 |
+
.project_img img {
|
97 |
+
width: 80%;
|
98 |
+
height: 80%;
|
99 |
+
}
|
100 |
+
.show_image {
|
101 |
+
justify-content: center;
|
102 |
+
align-items: center;
|
103 |
+
}
|
104 |
+
|
105 |
+
.show_image img {
|
106 |
+
width: 50%;
|
107 |
+
height: 50%;
|
108 |
+
}
|
109 |
+
|
110 |
+
|
111 |
+
|
112 |
+
.project_label {
|
113 |
+
font-size: 18px; /* 标题字体大小 */
|
114 |
+
color: #333; /* 字体颜色,这里使用深灰色 */
|
115 |
+
font-weight: bold; /* 字体加粗 */
|
116 |
+
text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1); /* 文字阴影 */
|
117 |
+
transition: all 0.3s ease; /* 平滑过渡动画 */
|
118 |
+
padding: 10px; /* 内填充 */
|
119 |
+
margin-bottom: 20px; /* 底部外边距 */
|
120 |
+
border-bottom: 2px solid #ddd; /* 底部边框样式 */
|
121 |
+
}
|
122 |
+
|
123 |
+
.project_name {
|
124 |
+
font-size: 30px; /* 调整字体大小 */
|
125 |
+
color: #333333; /* 字体颜色更深,增加对比度 */
|
126 |
+
margin-top: 20px; /* 调整名称顶部的间距 */
|
127 |
+
/* font-weight: bold; 字体加粗 */
|
128 |
+
/* text-transform: uppercase; 文字大写 */
|
129 |
+
align-items: center;
|
130 |
+
justify-content: center;
|
131 |
+
text-align: center; /* 文字居中 */
|
132 |
+
letter-spacing: 1.5px; /* 增加字母间距 */
|
133 |
+
transition: all 0.3s ease; /* 平滑过渡动画 */
|
134 |
+
}
|
135 |
+
|
136 |
+
.project_desc {
|
137 |
+
color: #444444; /* 字体颜色更深 */
|
138 |
+
font-size: 18px; /* 增加字体大小 */
|
139 |
+
margin: 20px 0; /* 增加上下间距 */
|
140 |
+
text-align: center; /* 文字居中 */
|
141 |
+
line-height: 1.5; /* 增加行高,提升可读性 */
|
142 |
+
transition: all 0.3s ease; /* 平滑过渡动画 */
|
143 |
+
}
|
144 |
+
|
145 |
+
.markdown-body .message {
|
146 |
+
white-space: pre-wrap;
|
147 |
+
}
|
148 |
+
|
149 |
+
.markdown-body details {
|
150 |
+
white-space: nowrap;
|
151 |
+
}
|
152 |
+
.markdown-body .bot details:not(:last-child) {
|
153 |
+
margin-bottom: 1px;
|
154 |
+
}
|
155 |
+
.markdown-body summary {
|
156 |
+
background-color: #4b5563;
|
157 |
+
color: #eee;
|
158 |
+
padding: 0 4px;
|
159 |
+
border-radius: 4px;
|
160 |
+
font-size: 0.9em;
|
161 |
+
}
|
162 |
+
|
163 |
+
|
164 |
+
.project_intro {
|
165 |
+
display: grid;
|
166 |
+
place-items: center; /* 完美居中 */
|
167 |
+
height: 100px; /* 高度 */
|
168 |
+
width:
|
169 |
+
font-size: 15px; /* 正文字体大小 */
|
170 |
+
/* text-align: center; 文字居中 */
|
171 |
+
color: #555; /* 正文字体颜色,这里使用较浅的灰色 */
|
172 |
+
border-radius: 8px; /* 边框圆角 */
|
173 |
+
transition: transform 0.3s ease; /* 平滑过渡动画 */
|
174 |
+
}
|
175 |
+
|
176 |
+
/* 鼠标悬停时的动画效果 */
|
177 |
+
.project_desc:hover,
|
178 |
+
.project_name:hover,
|
179 |
+
.project_label:hover,
|
180 |
+
.project_intro:hover {
|
181 |
+
transform: translateY(-5px); /* 向上移动 */
|
182 |
+
}
|
app.py
ADDED
@@ -0,0 +1,335 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import inspect
|
3 |
+
import base64
|
4 |
+
import yaml
|
5 |
+
import copy
|
6 |
+
import shutil
|
7 |
+
import gradio as gr
|
8 |
+
from data_juicer.ops.base_op import OPERATORS
|
9 |
+
from data_juicer.utils.constant import Fields
|
10 |
+
demo_path = os.path.dirname(os.path.abspath(__file__))
|
11 |
+
project_path = os.path.dirname(os.path.dirname(demo_path))
|
12 |
+
|
13 |
+
|
14 |
+
# 图片本地路径转换为 base64 格式
|
15 |
+
def covert_image_to_base64(image_path):
|
16 |
+
# 获得文件后缀名
|
17 |
+
ext = image_path.split(".")[-1]
|
18 |
+
if ext not in ["gif", "jpeg", "png"]:
|
19 |
+
ext = "jpeg"
|
20 |
+
|
21 |
+
with open(image_path, "rb") as image_file:
|
22 |
+
# Read the file
|
23 |
+
encoded_string = base64.b64encode(image_file.read())
|
24 |
+
|
25 |
+
# Convert bytes to string
|
26 |
+
base64_data = encoded_string.decode("utf-8")
|
27 |
+
|
28 |
+
# 生成base64编码的地址
|
29 |
+
base64_url = f"data:image/{ext};base64,{base64_data}"
|
30 |
+
return base64_url
|
31 |
+
|
32 |
+
|
33 |
+
def format_cover_html(project_img_path):
|
34 |
+
readme_link = 'https://github.com/alibaba/data-juicer'
|
35 |
+
config = {
|
36 |
+
'name': "Data-Juicer",
|
37 |
+
'label': "Op Insight",
|
38 |
+
'description': f'A One-Stop Data Processing System for Large Language Models.',
|
39 |
+
'introduction':
|
40 |
+
"This project is being actively updated and maintained, and we will periodically enhance and add more features and data recipes. <br>"
|
41 |
+
"We welcome you to join us in promoting LLM data development and research!<br>",
|
42 |
+
'demo':"You can experience the effect of the operators of Data-Juicer",
|
43 |
+
'note':'Note: Due to resource limitations, only a subset of operations is available here. see more details in <a href="{readme_link}">GitHub</a>'
|
44 |
+
}
|
45 |
+
# image_src = covert_image_to_base64(project_img_path)
|
46 |
+
# <div class="project_img"> <img src={image_src} /> </div>
|
47 |
+
# <div class='project_cover'>
|
48 |
+
return f"""
|
49 |
+
<div>
|
50 |
+
<div class="project_name">{config.get("name", "")} </div>
|
51 |
+
<div class="project_desc">{config.get("description", "")}</div>
|
52 |
+
<div class="project_desc">{config.get("introduction", "")}</div>
|
53 |
+
<div class="project_desc">{config.get("demo", "")}</div>
|
54 |
+
<div class="project_desc">{config.get("note", "")}</div>
|
55 |
+
</div>
|
56 |
+
"""
|
57 |
+
op_text = ''
|
58 |
+
docs_file = os.path.join(project_path, 'docs/Operators.md')
|
59 |
+
if os.path.exists(docs_file):
|
60 |
+
with open(os.path.join(project_path, 'docs/Operators.md'), 'r') as f:
|
61 |
+
op_text = f.read()
|
62 |
+
|
63 |
+
def extract_op_desc(markdown_text, header):
|
64 |
+
start_index = markdown_text.find(header)
|
65 |
+
end_index = markdown_text.find("\n##", start_index + len(header))
|
66 |
+
return markdown_text[start_index+ len(header):end_index].strip()
|
67 |
+
|
68 |
+
op_desc = f"<div style='text-align: center;'>{extract_op_desc(op_text, '## Overview').split('All the specific ')[0].strip()}</div>"
|
69 |
+
op_list_desc = {
|
70 |
+
'mapper':extract_op_desc(op_text, '## Mapper <a name="mapper"/>'),
|
71 |
+
'filter':extract_op_desc(op_text, '## Filter <a name="filter"/>'),
|
72 |
+
'deduplicator':extract_op_desc(op_text, '## Deduplicator <a name="deduplicator"/>'),
|
73 |
+
'selector':extract_op_desc(op_text, '## Selector <a name="selector"/>'),
|
74 |
+
}
|
75 |
+
|
76 |
+
op_types = ['mapper', 'filter',]# 'deduplicator'] , 'selector']
|
77 |
+
local_ops_dict = {op_type:[] for op_type in op_types}
|
78 |
+
multimodal = os.getenv('MULTI_MODAL', True)
|
79 |
+
multimodal_visible = False
|
80 |
+
text_key = 'text'
|
81 |
+
image_key = 'images'
|
82 |
+
audio_key = 'audios'
|
83 |
+
video_key = 'videos'
|
84 |
+
|
85 |
+
def get_op_lists(op_type):
|
86 |
+
use_local_op = os.getenv('USE_LOCAL_OP', False)
|
87 |
+
if not use_local_op:
|
88 |
+
all_ops = list(OPERATORS.modules.keys())
|
89 |
+
options = [
|
90 |
+
name for name in all_ops if name.endswith(op_type)
|
91 |
+
]
|
92 |
+
else:
|
93 |
+
options = local_ops_dict.get(op_type, [])
|
94 |
+
|
95 |
+
for exclude in ['image', 'video', 'audio']:
|
96 |
+
options = [name for name in options if multimodal or exclude not in name]
|
97 |
+
return options
|
98 |
+
|
99 |
+
def show_code(op_name):
|
100 |
+
op_class = OPERATORS.modules[op_name]
|
101 |
+
text = inspect.getsourcelines(op_class)
|
102 |
+
|
103 |
+
init_signature = inspect.signature(op_class.__init__)
|
104 |
+
|
105 |
+
# 输出每个参数的名字和默认值
|
106 |
+
default_params = dict()
|
107 |
+
for name, parameter in init_signature.parameters.items():
|
108 |
+
if name in ['self', 'args', 'kwargs']:
|
109 |
+
continue # 跳过 'self' 参数
|
110 |
+
if parameter.default is not inspect.Parameter.empty:
|
111 |
+
default_params[name] = parameter.default
|
112 |
+
|
113 |
+
return ''.join(text[0]), yaml.dump(default_params)
|
114 |
+
|
115 |
+
def change_visible(op_name):
|
116 |
+
text_visible = True
|
117 |
+
video_visible = False
|
118 |
+
audio_visible = False
|
119 |
+
image_visible = False
|
120 |
+
if 'video' in op_name:
|
121 |
+
video_visible = True
|
122 |
+
elif 'audio' in op_name:
|
123 |
+
audio_visible = True
|
124 |
+
elif 'image' in op_name:
|
125 |
+
image_visible = True
|
126 |
+
return gr.update(visible=text_visible), gr.update(visible=image_visible), gr.update(visible=video_visible), gr.update(visible=audio_visible), gr.update(visible=text_visible), gr.update(visible=image_visible), gr.update(visible=video_visible), gr.update(visible=audio_visible)
|
127 |
+
|
128 |
+
def copy_func(file):
|
129 |
+
filename = None
|
130 |
+
if file:
|
131 |
+
filename= os.path.basename(file)
|
132 |
+
shutil.copyfile(file, filename)
|
133 |
+
return filename
|
134 |
+
|
135 |
+
def encode_sample(input_text, input_image, input_video, input_audio):
|
136 |
+
sample = dict()
|
137 |
+
sample[text_key]=input_text
|
138 |
+
sample[image_key]= [input_image] if input_image else []
|
139 |
+
sample[video_key]=[input_video] if input_video else []
|
140 |
+
sample[audio_key]=[input_audio] if input_audio else []
|
141 |
+
return sample
|
142 |
+
|
143 |
+
def decode_sample(output_sample):
|
144 |
+
output_text = output_sample[text_key]
|
145 |
+
output_image = output_sample[image_key][0] if output_sample[image_key] else None
|
146 |
+
output_video = output_sample[video_key][0] if output_sample[video_key] else None
|
147 |
+
output_audio = output_sample[audio_key][0] if output_sample[audio_key] else None
|
148 |
+
image_file = copy_func(output_image)
|
149 |
+
video_file = copy_func(output_video)
|
150 |
+
audio_file = copy_func(output_audio)
|
151 |
+
return output_text, image_file, video_file, audio_file
|
152 |
+
|
153 |
+
def create_tab_layout(op_tab, op_type, run_op, has_stats=False):
|
154 |
+
with op_tab:
|
155 |
+
options = get_op_lists(op_type)
|
156 |
+
label = f'Select a {op_type} to show details'
|
157 |
+
with gr.Row():
|
158 |
+
op_selector = gr.Dropdown(value=options[0], label=label, choices=options, interactive=True)
|
159 |
+
with gr.Column():
|
160 |
+
gr.Markdown(" **Op Parameters**")
|
161 |
+
op_params = gr.Code(label="Yaml",language='yaml', interactive=True)
|
162 |
+
run_button = gr.Button(value="🚀Run")
|
163 |
+
show_code_button = gr.Button(value="🔍Show Code")
|
164 |
+
|
165 |
+
with gr.Column():
|
166 |
+
with gr.Group('Inputs'):
|
167 |
+
gr.Markdown(" **Inputs**")
|
168 |
+
with gr.Row():
|
169 |
+
input_text = gr.TextArea(label="Text",interactive=True,)
|
170 |
+
input_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
171 |
+
input_video = gr.Video(label='Video', visible=multimodal_visible)
|
172 |
+
input_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
173 |
+
|
174 |
+
with gr.Group('Outputs'):
|
175 |
+
gr.Markdown(" **Outputs**")
|
176 |
+
with gr.Row():
|
177 |
+
output_text = gr.TextArea(label="Text",interactive=False,)
|
178 |
+
output_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
179 |
+
output_video = gr.Video(label='Video', visible=multimodal_visible)
|
180 |
+
output_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
181 |
+
|
182 |
+
with gr.Row():
|
183 |
+
if has_stats:
|
184 |
+
output_stats = gr.Json(label='Stats')
|
185 |
+
output_keep = gr.Text(label='Keep or not?', interactive=False)
|
186 |
+
|
187 |
+
code = gr.Code(label='Source', language='python')
|
188 |
+
inputs = [input_text, input_image, input_video, input_audio, op_selector, op_params]
|
189 |
+
outputs = [output_text, output_image, output_video, output_audio]
|
190 |
+
if has_stats:
|
191 |
+
outputs.append(output_stats)
|
192 |
+
outputs.append(output_keep)
|
193 |
+
|
194 |
+
def run_func(*args):
|
195 |
+
try:
|
196 |
+
try:
|
197 |
+
args = list(args)
|
198 |
+
op_params = args.pop()
|
199 |
+
params = yaml.safe_load(op_params)
|
200 |
+
except:
|
201 |
+
params = {}
|
202 |
+
if params is None:
|
203 |
+
params = {}
|
204 |
+
return run_op(*args, params)
|
205 |
+
except Exception as e:
|
206 |
+
gr.Error(str(e))
|
207 |
+
print(e)
|
208 |
+
return outputs
|
209 |
+
|
210 |
+
show_code_button.click(show_code, inputs=[op_selector], outputs=[code, op_params])
|
211 |
+
show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
212 |
+
run_button.click(run_func, inputs=inputs, outputs=outputs)
|
213 |
+
run_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
214 |
+
op_selector.select(show_code, inputs=[op_selector], outputs=[code, op_params])
|
215 |
+
op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
216 |
+
op_tab.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
217 |
+
|
218 |
+
def create_mapper_tab(op_type, op_tab):
|
219 |
+
with op_tab:
|
220 |
+
def run_op(input_text, input_image, input_video, input_audio, op_name, op_params):
|
221 |
+
op_class = OPERATORS.modules[op_name]
|
222 |
+
op = op_class(**op_params)
|
223 |
+
sample = encode_sample(input_text, input_image, input_video, input_audio)
|
224 |
+
output_sample = op.process(copy.deepcopy(sample))
|
225 |
+
return decode_sample(output_sample)
|
226 |
+
create_tab_layout(op_tab, op_type, run_op)
|
227 |
+
|
228 |
+
|
229 |
+
def create_filter_tab(op_type, op_tab):
|
230 |
+
def run_op(input_text, input_image, input_video, input_audio, op_name, op_params):
|
231 |
+
op_class = OPERATORS.modules[op_name]
|
232 |
+
op = op_class(**op_params)
|
233 |
+
sample = encode_sample(input_text, input_image, input_video, input_audio)
|
234 |
+
sample[Fields.stats] = dict()
|
235 |
+
output_sample = op.compute_stats(copy.deepcopy(sample))
|
236 |
+
if op.process(output_sample):
|
237 |
+
output_keep = 'Yes'
|
238 |
+
else:
|
239 |
+
output_keep = 'No'
|
240 |
+
output_stats = output_sample[Fields.stats]
|
241 |
+
return *decode_sample(output_sample), output_stats, output_keep
|
242 |
+
create_tab_layout(op_tab, op_type, run_op, has_stats=True)
|
243 |
+
|
244 |
+
|
245 |
+
def create_deduplicator_tab(op_type, op_tab):
|
246 |
+
with op_tab:
|
247 |
+
def run_op( input_text, input_image, input_video, input_audio, op_name, op_params):
|
248 |
+
op_class = OPERATORS.modules[op_name]
|
249 |
+
op = op_class(**op_params)
|
250 |
+
sample = encode_sample(input_text, input_image, input_video, input_audio)
|
251 |
+
output_sample = sample #op.compute_hash(copy.deepcopy(sample))
|
252 |
+
return decode_sample(output_sample)
|
253 |
+
create_tab_layout(op_tab, op_type, run_op, has_stats=True)
|
254 |
+
|
255 |
+
def create_tab_double_layout(op_tab, op_type, run_op):
|
256 |
+
with op_tab:
|
257 |
+
options = get_op_lists(op_type)
|
258 |
+
label = f'Select a {op_type} to show details'
|
259 |
+
with gr.Row():
|
260 |
+
op_selector = gr.Dropdown(value=options[0], label=label, choices=options, interactive=True)
|
261 |
+
with gr.Column():
|
262 |
+
gr.Markdown(" **Op Parameters**")
|
263 |
+
op_params = gr.Code(label="Yaml",language='yaml', interactive=True)
|
264 |
+
run_button = gr.Button(value="🚀Run")
|
265 |
+
show_code_button = gr.Button(value="🔍Show Code")
|
266 |
+
|
267 |
+
with gr.Column():
|
268 |
+
with gr.Group('Inputs'):
|
269 |
+
gr.Markdown(" **Inputs**")
|
270 |
+
with gr.Row():
|
271 |
+
input_text = gr.TextArea(label="Text",interactive=True,)
|
272 |
+
input_text2 = gr.TextArea(label="Text",interactive=True,)
|
273 |
+
input_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
274 |
+
input_image2 = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
275 |
+
input_video = gr.Video(label='Video', visible=multimodal_visible)
|
276 |
+
input_video2 = gr.Video(label='Video', visible=multimodal_visible)
|
277 |
+
input_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
278 |
+
input_audio2 = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
279 |
+
|
280 |
+
with gr.Group('Outputs'):
|
281 |
+
gr.Markdown(" **Outputs**")
|
282 |
+
with gr.Row():
|
283 |
+
output_text = gr.TextArea(label="Text",interactive=False,)
|
284 |
+
output_text2 = gr.TextArea(label="Text",interactive=False,)
|
285 |
+
output_image = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
286 |
+
output_image2 = gr.Image(label='Image', type='filepath', visible=multimodal_visible, elem_classes="show_image")
|
287 |
+
output_video = gr.Video(label='Video', visible=multimodal_visible)
|
288 |
+
output_video2 = gr.Video(label='Video', visible=multimodal_visible)
|
289 |
+
output_audio = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
290 |
+
output_audio2 = gr.Audio(label='Audio', type='filepath', visible=multimodal_visible)
|
291 |
+
|
292 |
+
code = gr.Code(label='Source', language='python')
|
293 |
+
inputs = [input_text, input_image, input_video, input_audio, input_text2, input_image2, input_video2, input_audio2, op_selector, op_params]
|
294 |
+
outputs = [output_text, output_image, output_video, output_audio, output_text2, output_image2, output_video2, output_audio2]
|
295 |
+
|
296 |
+
def run_func(*args):
|
297 |
+
try:
|
298 |
+
try:
|
299 |
+
op_params = args[-1]
|
300 |
+
params = yaml.safe_load(op_params)
|
301 |
+
except:
|
302 |
+
params = {}
|
303 |
+
if params is None:
|
304 |
+
params = {}
|
305 |
+
return run_op(input_text, input_image, input_video, input_audio, op_selector, params)
|
306 |
+
except Exception as e:
|
307 |
+
gr.Error(str(e))
|
308 |
+
return outputs
|
309 |
+
|
310 |
+
# show_code_button.click(show_code, inputs=[op_selector], outputs=[code, op_params])
|
311 |
+
# show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
312 |
+
# run_button.click(run_func, inputs=inputs, outputs=outputs)
|
313 |
+
# op_selector.select(show_code, inputs=[op_selector], outputs=[code, op_params])
|
314 |
+
# op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
315 |
+
show_code_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(show_code, inputs=[op_selector], outputs=[code, op_params])
|
316 |
+
run_button.click(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(run_func, inputs=[op_selector], outputs=[code, op_params])
|
317 |
+
op_selector.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4]).then(show_code, inputs=[op_selector], outputs=[code, op_params])
|
318 |
+
op_tab.select(change_visible, inputs=[op_selector], outputs=outputs[:4] + inputs[:4])
|
319 |
+
with gr.Blocks(css="./app.css") as demo:
|
320 |
+
|
321 |
+
dj_image = os.path.join(project_path, 'docs/imgs/data-juicer.jpg')
|
322 |
+
gr.HTML(format_cover_html(dj_image))
|
323 |
+
|
324 |
+
with gr.Accordion(label='Op Insight',open=True):
|
325 |
+
tabs = gr.Tabs()
|
326 |
+
with tabs:
|
327 |
+
op_tabs = {op_type: gr.Tab(label=op_type.capitalize() + 's') for op_type in op_types}
|
328 |
+
for op_type, op_tab in op_tabs.items():
|
329 |
+
create_op_tab_func = globals().get(f'create_{op_type}_tab', None)
|
330 |
+
if callable(create_op_tab_func):
|
331 |
+
create_op_tab_func(op_type, op_tab)
|
332 |
+
else:
|
333 |
+
gr.Error(f'{op_type} not callable')
|
334 |
+
|
335 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pydantic>=2
|
2 |
+
git+https://gh-proxy.com/https://github.com/alibaba/data-juicer.git@demos/op_insight_slight#egg=py-data-juicer[all]
|