Alexandre-Numind commited on
Commit
35ae9b2
Β·
verified Β·
1 Parent(s): 9134640

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -60
app.py CHANGED
@@ -8,6 +8,7 @@ model = AutoModelForVision2Seq.from_pretrained(
8
  model_name,
9
  trust_remote_code=True,
10
  torch_dtype=torch.bfloat16,
 
11
  device_map="auto",
12
  )
13
  processor = AutoProcessor.from_pretrained(
@@ -91,6 +92,7 @@ with gr.Blocks(title="NuExtract – zero-shot structured extraction") as demo:
91
  <meta charset="UTF-8" />
92
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
93
  <title>NuExtract-2 Overview</title>
 
94
  <style>
95
  img { display:block; margin-bottom:1rem; }
96
  ul { margin:1rem 0; padding-left:1.5rem; }
@@ -98,83 +100,104 @@ with gr.Blocks(title="NuExtract – zero-shot structured extraction") as demo:
98
  a:hover { text-decoration:underline; }
99
  h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
100
  pre { overflow-x:auto; border-radius:6px; padding:1rem; }
101
- code { border-radius:4px; padding:.2em .4em; font-family:monospace; }
 
 
102
  html[data-theme="dark"],
103
  @media (prefers-color-scheme: dark) {
104
- body { background-color:#1e1e1e;}
105
- code { background-color: #2d2d2d;}
106
- pre { background-color:#2a2a2a;}
107
  }
108
  html[data-theme="light"],
109
  @media (prefers-color-scheme: light) {
110
- body { background-color#ffffff;}
111
- code { background-color#f5f5f5;}
112
- pre { background-color#f5f5f5;}
 
 
 
 
 
 
 
 
 
 
 
 
113
  }
114
- </style>
115
  </head>
116
  <body>
117
- <p align="center">
118
  <a href="https://nuextract.ai/">
119
- <img src="https://cdn.prod.website-files.com/638364a4e52e440048a9529c/64188f405afcf42d0b85b926_logo_numind_final.png"
120
- alt="NuMind Logo" style="width: 200px; height: 50px;" />
121
  </a>
122
- </p>
123
- <p align="center">
124
- πŸ–₯️ <a href="https://nuextract.ai/">API / Platform</a>&nbsp&nbsp | &nbsp&nbspπŸ“‘ <a href="https://numind.ai/blog">Blog</a>&nbsp&nbsp | &nbsp&nbspπŸ—£οΈ <a href="https://discord.gg/3tsEtJNCDe">Discord</a> &nbsp&nbsp | &nbsp&nbspπŸ› οΈ <a href="https://github.com/numindai/nuextract">Github</a>
125
- </p>
126
-
127
- <section>
128
- <h3> This space is a demo for <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a> </h3>
129
- <h3> You can also check: <a href="https://huggingface.co/numind/NuExtract-2.0-2B" target="_blank">NuExtract-2.0-2B</a> and <a href="https://huggingface.co/numind/NuExtract-2.0-8B" target="_blank">NuExtract-2.0-8B</a> and our top performing model via the <a href="https://nuextract.ai/">API / Platform</a> </h3>
130
-
131
- <h1>NuExtreact-2.0</h1>
132
- <p>NuExtract 2.0 is a family of models trained specifically for structured information extraction tasks. It supports both multimodal inputs and is multilingual.</p>
133
- <p> To use the model, provide an input text/image and a JSON template describing the information you need to extract. The template should be a JSON object, specifying field names and their expected type. </p>
134
- <article>
135
- <h3>Supported Template Types</h3>
136
- <ul>
137
- <li><code>verbatim-string</code> β€” extract text exactly as it appears.</li>
138
- <li><code>string</code> β€” generic text, with possible paraphrasing.</li>
139
- <li><code>integer</code> β€” whole number.</li>
140
- <li><code>number</code> β€” decimal or whole number.</li>
141
- <li><code>date-time</code> β€” ISO 8601 date format.</li>
142
- <li><code>boolean</code> β€” True or False.</li>
143
- <li>Array of any type above (e.g. <code>["string"]</code>).</li>
144
- <li><code>enum</code> β€” one value from a predefined list (e.g. <code>["yes", "no", "maybe"]</code>).</li>
145
- <li><code>multi-label</code> β€” multiple values from a list (e.g. <code>[["A", "B", "C"]]</code>).</li>
146
- </ul>
147
- <p> You can specify any nested strucure, such as object inside object or list of object </p>
148
- <p>If no relevant information is found, the model returns <code>null</code> or <code>[]</code>.</p>
149
- </article>
150
-
151
- <article>
152
- <h3>Example Template</h3>
153
- <pre><code>{
 
 
 
154
  "first_name": "verbatim-string",
155
- "last_name": "verbatim-string",
156
  "description": "string",
157
- "age": "integer",
158
  "classes": [
159
- {
160
- "name" : "verbatim-string",
161
- "professors" : ["verbatim-string"],
162
- "gpa": number",
163
- }
164
  ],
165
  "average_gpa": "number",
166
- "birth_date": "date-time",
167
  "nationality": ["France", "England", "Japan", "USA", "China"],
168
  "languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
169
  }</code></pre>
170
- </article>
171
- <strong>You can also provide a description of what you want to extract, use a non-JSON format (e.g. YAML, Pydantic) or even an example of input text. The model will automatically update the template field and generate a compatible JSON template based on our typing system.</strong>
172
- </section>
173
- <br>
174
- <section>
175
- <ul><h4><strong>Model used in this demo:</strong> <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h4></ul>
176
- <i>⚠️ This demo restricts inputs to 10,000 tokens</i>
177
- </section>
 
 
 
 
178
  </body>
179
  </html>
180
  """)
@@ -191,7 +214,7 @@ with gr.Blocks(title="NuExtract – zero-shot structured extraction") as demo:
191
 
192
  example_data = [
193
  [
194
- "data/affiche.jpg", # image file
195
  "", # no text
196
  """{
197
  "movie_name": "verbatim-string",
 
8
  model_name,
9
  trust_remote_code=True,
10
  torch_dtype=torch.bfloat16,
11
+ attn_implementation="flash_attention_2",
12
  device_map="auto",
13
  )
14
  processor = AutoProcessor.from_pretrained(
 
92
  <meta charset="UTF-8" />
93
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
94
  <title>NuExtract-2 Overview</title>
95
+
96
  <style>
97
  img { display:block; margin-bottom:1rem; }
98
  ul { margin:1rem 0; padding-left:1.5rem; }
 
100
  a:hover { text-decoration:underline; }
101
  h1,h2 { margin:0 0 .5rem 0; font-weight:600; }
102
  pre { overflow-x:auto; border-radius:6px; padding:1rem; }
103
+ code { border-radius:1px; padding:.1em .1em; font-family:monospace; }
104
+
105
+ /* β€”β€”β€” Dark / light themes β€”β€”β€” */
106
  html[data-theme="dark"],
107
  @media (prefers-color-scheme: dark) {
108
+ body { background-color:#1e1e1e; }
109
+ code { background-color:#2d2d2d; }
110
+ pre { background-color:#2a2a2a; }
111
  }
112
  html[data-theme="light"],
113
  @media (prefers-color-scheme: light) {
114
+ body { background-color:#ffffff; }
115
+ code { background-color:#f5f5f5; }
116
+ pre { background-color:#f5f5f5; }
117
+ }
118
+
119
+ /* β€”β€”β€” NEW: put the two articles side-by-side β€”β€”β€” */
120
+ .template-container {
121
+ display: flex;
122
+ flex-wrap: wrap; /* stacks on small screens */
123
+ gap: 2rem;
124
+ margin-top: 1rem;
125
+ }
126
+ .template-container article {
127
+ flex: 1 1 320px; /* grow / shrink with a sensible min width */
128
+ min-width: 280px;
129
  }
130
+ </style>
131
  </head>
132
  <body>
133
+ <p align="center">
134
  <a href="https://nuextract.ai/">
135
+ <img src="https://cdn.prod.website-files.com/638364a4e52e440048a9529c/64188f405afcf42d0b85b926_logo_numind_final.png"
136
+ alt="NuMind Logo" style="width:200px;height:50px;" />
137
  </a>
138
+ </p>
139
+ <p align="center">
140
+ πŸ–₯️ <a href="https://nuextract.ai/">API / Platform</a>&nbsp;|&nbsp;πŸ“‘ <a href="https://numind.ai/blog">Blog</a>&nbsp;|&nbsp;πŸ—£οΈ <a href="https://discord.gg/3tsEtJNCDe">Discord</a>&nbsp;|&nbsp;πŸ› οΈ <a href="https://github.com/numindai/nuextract">Github</a>
141
+ </p>
142
+
143
+ <section>
144
+ <h3>This space is a demo for <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h3>
145
+ <h3>You can also check: <a href="https://huggingface.co/numind/NuExtract-2.0-2B" target="_blank">NuExtract-2.0-2B</a> and <a href="https://huggingface.co/numind/NuExtract-2.0-8B" target="_blank">NuExtract-2.0-8B</a> and our top-performing model via the <a href="https://nuextract.ai/">API / Platform</a></h3>
146
+
147
+ <h1>NuExtract-2.0</h1>
148
+ <p>NuExtract 2.0 is a family of models trained specifically for structured information extraction tasks. It supports both multimodal inputs and is multilingual.</p>
149
+ <p>To use the model, provide an input text/image and a JSON template describing the information you need to extract. The template should be a JSON object, specifying field names and their expected type.</p>
150
+
151
+ <!-- ------------- SIDE-BY-SIDE CONTAINER ------------- -->
152
+ <div class="template-container">
153
+ <!-- Supported Template Types -->
154
+ <article>
155
+ <h3>Supported Template Types</h3>
156
+ <ul>
157
+ <li><code>verbatim-string</code> β€” extract text exactly as it appears.</li>
158
+ <li><code>string</code> β€” generic text, with possible paraphrasing.</li>
159
+ <li><code>integer</code> β€” whole number.</li>
160
+ <li><code>number</code> β€” decimal or whole number.</li>
161
+ <li><code>date-time</code> β€” ISO 8601 date format.</li>
162
+ <li><code>boolean</code> β€” True or False.</li>
163
+ <li>Array of any type above (e.g. <code>["string"]</code>).</li>
164
+ <li><code>enum</code> β€” one value from a predefined list (e.g. <code>["yes", "no", "maybe"]</code>).</li>
165
+ <li><code>multi-label</code> β€” multiple values from a list (e.g. <code>[["A", "B", "C"]]</code>).</li>
166
+ </ul>
167
+ <p>You can specify any nested structure, such as an object inside an object or a list of objects. If no relevant information is found, the model returns <code>null</code> or <code>[]</code>.</p>
168
+ </article>
169
+ <!-- Example Template -->
170
+ <article>
171
+ <h3>Example Template</h3>
172
+ <pre><code>{
173
  "first_name": "verbatim-string",
174
+ "last_name": "verbatim-string",
175
  "description": "string",
176
+ "age": "integer",
177
  "classes": [
178
+ {
179
+ "name": "verbatim-string",
180
+ "professors": ["verbatim-string"],
181
+ "gpa": "number"
182
+ }
183
  ],
184
  "average_gpa": "number",
185
+ "birth_date": "date-time",
186
  "nationality": ["France", "England", "Japan", "USA", "China"],
187
  "languages_spoken": [["English", "French", "Japanese", "Mandarin", "Spanish"]]
188
  }</code></pre>
189
+ </article>
190
+ </div><!-- /.template-container -->
191
+ <br>
192
+ <strong>You can also provide a description of what you want to extract, use a non-JSON format (e.g. YAML, Pydantic) or even an example of input text. The model will automatically update the template field and generate a compatible JSON template based on our typing system.</strong>
193
+ </section>
194
+
195
+ <br>
196
+
197
+ <section>
198
+ <ul><h4><strong>Model used in this demo:</strong> <a href="https://huggingface.co/numind/NuExtract-2.0-4B" target="_blank">NuExtract-2.0-4B</a></h4></ul>
199
+ <i>⚠️ This demo restricts inputs to 10,000 tokens</i>
200
+ </section>
201
  </body>
202
  </html>
203
  """)
 
214
 
215
  example_data = [
216
  [
217
+ "examples/affiche.jpg", # image file
218
  "", # no text
219
  """{
220
  "movie_name": "verbatim-string",