Commit
·
0a6ea65
1
Parent(s):
daa1a9a
Update app.py
Browse files
app.py
CHANGED
@@ -16,8 +16,9 @@ import streamlit as st
|
|
16 |
|
17 |
from txtai.embeddings import Documents, Embeddings
|
18 |
from txtai.pipeline import Segmentation, Summary, Tabular, Translation
|
19 |
-
from txtai.workflow import ServiceTask, Task, Workflow
|
20 |
|
|
|
21 |
|
22 |
class Application:
|
23 |
"""
|
@@ -245,6 +246,7 @@ class Application:
|
|
245 |
dict with component settings
|
246 |
"""
|
247 |
|
|
|
248 |
options = {"type": component}
|
249 |
|
250 |
st.markdown("---")
|
@@ -268,8 +270,12 @@ class Application:
|
|
268 |
options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
|
269 |
options["upsert"] = self.boolean("Upsert", config, "upsert")
|
270 |
|
271 |
-
elif component
|
272 |
-
|
|
|
|
|
|
|
|
|
273 |
options["sentences"] = self.boolean("Split sentences", config, "sentences")
|
274 |
options["lines"] = self.boolean("Split lines", config, "lines")
|
275 |
options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
|
@@ -346,6 +352,10 @@ class Application:
|
|
346 |
self.pipelines[wtype] = Tabular(**self.components["tabular"])
|
347 |
tasks.append(Task(self.pipelines[wtype]))
|
348 |
|
|
|
|
|
|
|
|
|
349 |
elif wtype == "translation":
|
350 |
self.pipelines[wtype] = Translation()
|
351 |
tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
|
@@ -398,6 +408,10 @@ class Application:
|
|
398 |
data[wtype] = component
|
399 |
tasks.append({"action": wtype})
|
400 |
|
|
|
|
|
|
|
|
|
401 |
elif wtype == "translation":
|
402 |
data[wtype] = {}
|
403 |
tasks.append({"action": wtype, "args": list(component.values())})
|
@@ -519,8 +533,8 @@ class Application:
|
|
519 |
st.markdown("---")
|
520 |
|
521 |
# Component configuration
|
522 |
-
labels = {"segmentation": "segment", "translation": "translate"}
|
523 |
-
components = ["embeddings", "segmentation", "service", "summary", "tabular", "translation"]
|
524 |
|
525 |
selected, workflow = self.load(components)
|
526 |
selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))
|
|
|
16 |
|
17 |
from txtai.embeddings import Documents, Embeddings
|
18 |
from txtai.pipeline import Segmentation, Summary, Tabular, Translation
|
19 |
+
from txtai.workflow import ServiceTask, Task, UrlTask, Workflow
|
20 |
|
21 |
+
from textractor import Textractor
|
22 |
|
23 |
class Application:
|
24 |
"""
|
|
|
246 |
dict with component settings
|
247 |
"""
|
248 |
|
249 |
+
# pylint: disable=R0912, R0915
|
250 |
options = {"type": component}
|
251 |
|
252 |
st.markdown("---")
|
|
|
270 |
options["path"] = self.text("Embeddings model path", config, "path", "sentence-transformers/nli-mpnet-base-v2")
|
271 |
options["upsert"] = self.boolean("Upsert", config, "upsert")
|
272 |
|
273 |
+
elif component in ("segmentation", "textractor"):
|
274 |
+
if component == "segmentation":
|
275 |
+
st.markdown("**Segment** \n*Split text into semantic units*")
|
276 |
+
else:
|
277 |
+
st.markdown("**Textract** \n*Extract text from documents*")
|
278 |
+
|
279 |
options["sentences"] = self.boolean("Split sentences", config, "sentences")
|
280 |
options["lines"] = self.boolean("Split lines", config, "lines")
|
281 |
options["paragraphs"] = self.boolean("Split paragraphs", config, "paragraphs")
|
|
|
352 |
self.pipelines[wtype] = Tabular(**self.components["tabular"])
|
353 |
tasks.append(Task(self.pipelines[wtype]))
|
354 |
|
355 |
+
elif wtype == "textractor":
|
356 |
+
self.pipelines[wtype] = Textractor(**self.components["textract"])
|
357 |
+
tasks.append(UrlTask(self.pipelines[wtype]))
|
358 |
+
|
359 |
elif wtype == "translation":
|
360 |
self.pipelines[wtype] = Translation()
|
361 |
tasks.append(Task(lambda x: self.pipelines["translation"](x, **self.components["translation"])))
|
|
|
408 |
data[wtype] = component
|
409 |
tasks.append({"action": wtype})
|
410 |
|
411 |
+
elif wtype == "textractor":
|
412 |
+
data[wtype] = component
|
413 |
+
tasks.append({"action": wtype, "task": "url"})
|
414 |
+
|
415 |
elif wtype == "translation":
|
416 |
data[wtype] = {}
|
417 |
tasks.append({"action": wtype, "args": list(component.values())})
|
|
|
533 |
st.markdown("---")
|
534 |
|
535 |
# Component configuration
|
536 |
+
labels = {"segmentation": "segment", "textractor": "textract", "translation": "translate"}
|
537 |
+
components = ["embeddings", "segmentation", "service", "summary", "tabular", "textractor", "translation"]
|
538 |
|
539 |
selected, workflow = self.load(components)
|
540 |
selected = st.multiselect("Select components", components, default=selected, format_func=lambda text: labels.get(text, text))
|