Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ model = AutoModelForVision2Seq.from_pretrained(
|
|
8 |
model_name,
|
9 |
trust_remote_code=True,
|
10 |
torch_dtype=torch.bfloat16,
|
|
|
11 |
device_map="auto",
|
12 |
)
|
13 |
processor = AutoProcessor.from_pretrained(
|
@@ -91,6 +92,7 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
91 |
<meta charset="UTF-8" />
|
92 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
93 |
<title>NuExtract-2 Overview</title>
|
|
|
94 |
<style>
|
95 |
img { display:block; margin-bottom:1rem; }
|
96 |
ul { margin:1rem 0; padding-left:1.5rem; }
|
@@ -98,83 +100,104 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
98 |
a:hover { text-decoration:underline; }
|
99 |
h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
|
100 |
pre { overflow-x:auto; border-radius:6px; padding:1rem; }
|
101 |
-
code { border-radius:
|
|
|
|
|
102 |
html[data-theme="dark"],
|
103 |
@media (prefers-color-scheme: dark) {
|
104 |
-
body { background-color:#1e1e1e;}
|
105 |
-
code { background-color
|
106 |
-
pre { background-color:#2a2a2a;}
|
107 |
}
|
108 |
html[data-theme="light"],
|
109 |
@media (prefers-color-scheme: light) {
|
110 |
-
body { background-color
|
111 |
-
code { background-color
|
112 |
-
pre { background-color
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
}
|
114 |
-
</style>
|
115 |
</head>
|
116 |
<body>
|
117 |
-
<p align="center">
|
118 |
<a href="https://nuextract.ai/">
|
119 |
-
|
120 |
-
|
121 |
</a>
|
122 |
-
</p>
|
123 |
-
<p align="center">
|
124 |
-
π₯οΈ <a href="https://nuextract.ai/">API / Platform</a> 
|
125 |
-
</p>
|
126 |
-
|
127 |
-
<section>
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
<h1>
|
132 |
-
|
133 |
-
<p>
|
134 |
-
|
135 |
-
|
136 |
-
<
|
137 |
-
|
138 |
-
<
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
|
|
|
|
|
|
154 |
"first_name": "verbatim-string",
|
155 |
-
"last_name":
|
156 |
"description": "string",
|
157 |
-
"age":
|
158 |
"classes": [
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
],
|
165 |
"average_gpa": "number",
|
166 |
-
"birth_date":
|
167 |
"nationality": ["France", "England", "Japan", "USA", "China"],
|
168 |
"languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
|
169 |
}</code></pre>
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
<
|
174 |
-
|
175 |
-
|
176 |
-
<
|
177 |
-
|
|
|
|
|
|
|
|
|
178 |
</body>
|
179 |
</html>
|
180 |
""")
|
@@ -191,7 +214,7 @@ with gr.Blocks(title="NuExtract β zero-shot structured extraction") as demo:
|
|
191 |
|
192 |
example_data = [
|
193 |
[
|
194 |
-
"
|
195 |
"", # no text
|
196 |
"""{
|
197 |
"movie_name": "verbatim-string",
|
|
|
8 |
model_name,
|
9 |
trust_remote_code=True,
|
10 |
torch_dtype=torch.bfloat16,
|
11 |
+
attn_implementation="flash_attention_2",
|
12 |
device_map="auto",
|
13 |
)
|
14 |
processor = AutoProcessor.from_pretrained(
|
|
|
92 |
<meta charset="UTF-8" />
|
93 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
94 |
<title>NuExtract-2 Overview</title>
|
95 |
+
|
96 |
<style>
|
97 |
img { display:block; margin-bottom:1rem; }
|
98 |
ul { margin:1rem 0; padding-left:1.5rem; }
|
|
|
100 |
a:hover { text-decoration:underline; }
|
101 |
h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
|
102 |
pre { overflow-x:auto; border-radius:6px; padding:1rem; }
|
103 |
+
code { border-radius:1px; padding:.1em .1em; font-family:monospace; }
|
104 |
+
|
105 |
+
/* βββ Dark / light themes βββ */
|
106 |
html[data-theme="dark"],
|
107 |
@media (prefers-color-scheme: dark) {
|
108 |
+
body { background-color:#1e1e1e; }
|
109 |
+
code { background-color:#2d2d2d; }
|
110 |
+
pre { background-color:#2a2a2a; }
|
111 |
}
|
112 |
html[data-theme="light"],
|
113 |
@media (prefers-color-scheme: light) {
|
114 |
+
body { background-color:#ffffff; }
|
115 |
+
code { background-color:#f5f5f5; }
|
116 |
+
pre { background-color:#f5f5f5; }
|
117 |
+
}
|
118 |
+
|
119 |
+
/* βββ NEW: put the two articles side-by-side βββ */
|
120 |
+
.template-container {
|
121 |
+
display: flex;
|
122 |
+
flex-wrap: wrap; /* stacks on small screens */
|
123 |
+
gap: 2rem;
|
124 |
+
margin-top: 1rem;
|
125 |
+
}
|
126 |
+
.template-container article {
|
127 |
+
flex: 1 1 320px; /* grow / shrink with a sensible min width */
|
128 |
+
min-width: 280px;
|
129 |
}
|
130 |
+
</style>
|
131 |
</head>
|
132 |
<body>
|
133 |
+
<p align="center">
|
134 |
<a href="https://nuextract.ai/">
|
135 |
+
<img src="https://cdn.prod.website-files.com/638364a4e52e440048a9529c/64188f405afcf42d0b85b926_logo_numind_final.png"
|
136 |
+
alt="NuMind Logo" style="width:200px;height:50px;" />
|
137 |
</a>
|
138 |
+
</p>
|
139 |
+
<p align="center">
|
140 |
+
π₯οΈ <a href="https://nuextract.ai/">API / Platform</a> | π <a href="https://numind.ai/blog">Blog</a> | π£οΈ <a href="https://discord.gg/3tsEtJNCDe">Discord</a> | π οΈ <a href="https://github.com/numindai/nuextract">Github</a>
|
141 |
+
</p>
|
142 |
+
|
143 |
+
<section>
|
144 |
+
<h3>This space is a demo for <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h3>
|
145 |
+
<h3>You can also check: <a href="https://huggingface.co/numind/NuExtract-2.0-2B" target="_blank">NuExtract-2.0-2B</a> and <a href="https://huggingface.co/numind/NuExtract-2.0-8B" target="_blank">NuExtract-2.0-8B</a> and our top-performing model via the <a href="https://nuextract.ai/">API / Platform</a></h3>
|
146 |
+
|
147 |
+
<h1>NuExtract-2.0</h1>
|
148 |
+
<p>NuExtract 2.0 is a family of models trained specifically for structured information extraction tasks. It supports both multimodal inputs and is multilingual.</p>
|
149 |
+
<p>To use the model, provide an input text/image and a JSON template describing the information you need to extract. The template should be a JSON object, specifying field names and their expected type.</p>
|
150 |
+
|
151 |
+
<!-- ------------- SIDE-BY-SIDE CONTAINER ------------- -->
|
152 |
+
<div class="template-container">
|
153 |
+
<!-- Supported Template Types -->
|
154 |
+
<article>
|
155 |
+
<h3>Supported Template Types</h3>
|
156 |
+
<ul>
|
157 |
+
<li><code>verbatim-string</code> β extract text exactly as it appears.</li>
|
158 |
+
<li><code>string</code> β generic text, with possible paraphrasing.</li>
|
159 |
+
<li><code>integer</code> β whole number.</li>
|
160 |
+
<li><code>number</code> β decimal or whole number.</li>
|
161 |
+
<li><code>date-time</code> β ISO 8601 date format.</li>
|
162 |
+
<li><code>boolean</code> β True or False.</li>
|
163 |
+
<li>Array of any type above (e.g. <code>["string"]</code>).</li>
|
164 |
+
<li><code>enum</code> β one value from a predefined list (e.g. <code>["yes", "no", "maybe"]</code>).</li>
|
165 |
+
<li><code>multi-label</code> β multiple values from a list (e.g. <code>[["A", "B", "C"]]</code>).</li>
|
166 |
+
</ul>
|
167 |
+
<p>You can specify any nested structure, such as an object inside an object or a list of objects. If no relevant information is found, the model returns <code>null</code> or <code>[]</code>.</p>
|
168 |
+
</article>
|
169 |
+
<!-- Example Template -->
|
170 |
+
<article>
|
171 |
+
<h3>Example Template</h3>
|
172 |
+
<pre><code>{
|
173 |
"first_name": "verbatim-string",
|
174 |
+
"last_name": "verbatim-string",
|
175 |
"description": "string",
|
176 |
+
"age": "integer",
|
177 |
"classes": [
|
178 |
+
{
|
179 |
+
"name": "verbatim-string",
|
180 |
+
"professors": ["verbatim-string"],
|
181 |
+
"gpa": "number"
|
182 |
+
}
|
183 |
],
|
184 |
"average_gpa": "number",
|
185 |
+
"birth_date": "date-time",
|
186 |
"nationality": ["France", "England", "Japan", "USA", "China"],
|
187 |
"languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
|
188 |
}</code></pre>
|
189 |
+
</article>
|
190 |
+
</div><!-- /.template-container -->
|
191 |
+
<br>
|
192 |
+
<strong>You can also provide a description of what you want to extract, use a non-JSON format (e.g. YAML, Pydantic) or even an example of input text. The model will automatically update the template field and generate a compatible JSON template based on our typing system.</strong>
|
193 |
+
</section>
|
194 |
+
|
195 |
+
<br>
|
196 |
+
|
197 |
+
<section>
|
198 |
+
<ul><h4><strong>Model used in this demo:</strong> <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h4></ul>
|
199 |
+
<i>β οΈ This demo restricts inputs to 10,000 tokens</i>
|
200 |
+
</section>
|
201 |
</body>
|
202 |
</html>
|
203 |
""")
|
|
|
214 |
|
215 |
example_data = [
|
216 |
[
|
217 |
+
"examples/affiche.jpg", # image file
|
218 |
"", # no text
|
219 |
"""{
|
220 |
"movie_name": "verbatim-string",
|