Spaces:

mikeee
/

ttw

Running

App Files Files Community

freemt commited on Jan 9, 2022

Commit

a4a35d8

1 Parent(s): 25a8a17

Update altair

Browse files

Files changed (17) hide show

.gitignore +4 -0
app.cmd +6 -0
app.exe +0 -0
app.py +59 -5
app.spec +65 -0
chart_df.html +35 -0
chart_df.png +0 -0
flagged/Output 2/0.png +0 -0
flagged/log.csv +3 -0
gradiobee/seg_text.py +55 -9
requirements.in +2 -0
requirements.txt +25 -4
run-nuitka.bat +2 -0
run-pyinstaller-spec.bat +1 -0
run-python-app_py.bat +1 -1
tests/__init__.py +0 -0
tests/test_seg_text.py +47 -0

.gitignore CHANGED Viewed

@@ -1,2 +1,6 @@
 .venv
 **/__pycache__

 .venv
 **/__pycache__
+app.build
+app.dist
+build
+app.exe

app.cmd ADDED Viewed

	@@ -0,0 +1,6 @@

+@echo off
+rem This script was created by Nuitka to execute 'app.exe' with Python DLL being found.
+set PATH=c:\python\python37;%PATH%
+set PYTHONHOME=c:\python\python37
+"%~dp0.\app.exe"

app.exe ADDED Viewed

Binary file (702 kB). View file

app.py CHANGED Viewed

@@ -1,15 +1,25 @@
 """Talk to spaces VM via subprocess.check_output."""
 # import httpx
 import subprocess as sp
 from shlex import split
 # from textwrap import dedent
 from inspect import cleandoc
 import gradio as gr
 from logzero import logger
 from gradiobee.seg_text import seg_text
 # def greet(command):
 def process(command):
@@ -42,17 +52,54 @@ def process(command):
             ).strip()
             if not out:
                 out = "No output, that's all we know."
-            return out
     # not is_command or not flag: text, do seg_text
     _ = "\n\n".join(seg_text(command.strip()))
     # _ = seg_text(command.strip())
-    return cleandoc(
         f"""seg_text output (segmented sents):
         {_}
         """
     ).strip()
 iface = gr.Interface(
     # fn=greet,
@@ -65,7 +112,13 @@ iface = gr.Interface(
         default="python -m site",
         label="command or multiline text",
     ),
-    outputs="text",
     examples=[
         "cat /proc/version",
         "free  # show free memory",
@@ -76,7 +129,8 @@ iface = gr.Interface(
     ],
     title="probe the system",
     description="Talk to the system via subprocess.check_output ",
 )
-# iface.launch(share=True, debug=True)
-iface.launch(debug=True)

 """Talk to spaces VM via subprocess.check_output."""
+# pylint: disable=wrong-import-position
+import sys
+from pathlib import Path
+if "." not in sys.path:
+    sys.path.insert(0, ".")
 # import httpx
 import subprocess as sp
 from shlex import split
+import pandas as pd
 # from textwrap import dedent
 from inspect import cleandoc
 import gradio as gr
+import logzero
 from logzero import logger
 from gradiobee.seg_text import seg_text
+logzero.loglevel()  # default to 10
 # def greet(command):
 def process(command):
             ).strip()
             if not out:
                 out = "No output, that's all we know."
+            return out, None
+    # quick test altair altair-save tooltip
+    # from PIL import Image
+    import altair as alt
+    from altair_saver import save
+    df_ = pd.DataFrame(data={'x': [1, 2], 'y': [3, 4], "cos": [0.1, 0.5]})
+    chart_df = alt.Chart(df_).mark_circle(size=60).encode(
+        x='x',
+        y='y',
+        color='cos',
+        # tooltip=['x', 'y', 'cos', ]
+    )
+    # .interactive()
+    # save(chart_df, "chart_df.html")
+    # chart_df_html = Path("chart_df.html").read_text("utf")
+    # save(chart_df, "chart_df.png")
+    # chart_df_png = Path("chart_df.png").read_bytes()
+    # chart_df_png = Image.open("chart_df.png")
+    # chart_df_png = "chart_df.png"
+    # scatter_plot.save('simple_scatter_plot_with_altairchart.html')
+    # chart_df.save("chart_df.html")  # does not work, constains js
+    # chart_df_html = Path("chart_df.html").read_text("utf")
+    chart_df.save("chart_df.png")  #
+    chart_df_png = "chart_df.png"
     # not is_command or not flag: text, do seg_text
     _ = "\n\n".join(seg_text(command.strip()))
+    logger.debug(_)
+    # logger.debug(chart_df_html)
+    # print(_)
+    # print(chart_df_html)
     # _ = seg_text(command.strip())
+    _ = cleandoc(
         f"""seg_text output (segmented sents):
         {_}
         """
     ).strip()
+    # return _, chart_df_html
+    return _, chart_df_png
 iface = gr.Interface(
     # fn=greet,
         default="python -m site",
         label="command or multiline text",
     ),
+    # outputs="text",
+    # outputs=["text",],
+    # outputs=["text", "html"],
+    outputs=[
+        "text",
+        gr.outputs.Image("auto"),
+    ],
     examples=[
         "cat /proc/version",
         "free  # show free memory",
     ],
     title="probe the system",
     description="Talk to the system via subprocess.check_output ",
+    layout="vertical",
 )
+iface.launch(share=True, debug=True)
+# iface.launch()

app.spec ADDED Viewed

	@@ -0,0 +1,65 @@

+# -*- mode: python ; coding: utf-8 -*-
+block_cipher = None
+a = Analysis(['app.py'],
+             pathex=[],
+             binaries=[],
+             datas=[],
+             hiddenimports=[
+                "gradio",
+                "requests",
+                "Flask-Login",
+                "markdown2",
+                "analytics-python",
+                "Flask",
+                "Flask-Cors",
+                "flask-cachebuster",
+                "paramiko",
+                "tornado",
+                "matplotlib",
+                "pycryptodome",
+                "pandas",
+                "pillow",
+                "pydub",
+                "ffmpy",
+             ],
+             hookspath=[],
+             hooksconfig={},
+             runtime_hooks=[],
+             excludes=[
+                "Ipython",
+                "wx",
+                "wx",
+             ],
+             win_no_prefer_redirects=False,
+             win_private_assemblies=False,
+             cipher=block_cipher,
+             noarchive=False)
+pyz = PYZ(a.pure, a.zipped_data,
+             cipher=block_cipher)
+exe = EXE(pyz,
+          a.scripts,
+          [],
+          exclude_binaries=True,
+          name='app',
+          debug=False,
+          bootloader_ignore_signals=False,
+          strip=False,
+          upx=True,
+          console=True,
+          disable_windowed_traceback=False,
+          target_arch=None,
+          codesign_identity=None,
+          entitlements_file=None )
+coll = COLLECT(exe,
+               a.binaries,
+               a.zipfiles,
+               a.datas,
+               strip=False,
+               upx=True,
+               upx_exclude=[],
+               name='app')

chart_df.html ADDED Viewed

	@@ -0,0 +1,35 @@

+<!DOCTYPE html>
+<html>
+<head>
+  <style>
+    .error {
+        color: red;
+    }
+  </style>
+  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega@5"></script>
+  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//[email protected]"></script>
+  <script type="text/javascript" src="https://cdn.jsdelivr.net/npm//vega-embed@6"></script>
+</head>
+<body>
+  <div id="vis"></div>
+  <script>
+    (function(vegaEmbed) {
+      var spec = {"config": {"view": {"continuousWidth": 400, "continuousHeight": 300}}, "data": {"name": "data-e58adc7548bdded6fd09c49a28ff71ba"}, "mark": {"type": "circle", "size": 60}, "encoding": {"color": {"field": "cos", "type": "quantitative"}, "x": {"field": "x", "type": "quantitative"}, "y": {"field": "y", "type": "quantitative"}}, "$schema": "https://vega.github.io/schema/vega-lite/v4.17.0.json", "datasets": {"data-e58adc7548bdded6fd09c49a28ff71ba": [{"x": 1, "y": 3, "cos": 0.1}, {"x": 2, "y": 4, "cos": 0.5}]}};
+      var embedOpt = {"mode": "vega-lite"};
+      function showError(el, error){
+          el.innerHTML = ('<div class="error" style="color:red;">'
+                          + '<p>JavaScript Error: ' + error.message + '</p>'
+                          + "<p>This usually means there's a typo in your chart specification. "
+                          + "See the javascript console for the full traceback.</p>"
+                          + '</div>');
+          throw error;
+      }
+      const el = document.getElementById('vis');
+      vegaEmbed("#vis", spec, embedOpt)
+        .catch(error => showError(el, error));
+    })(vegaEmbed);
+  </script>
+</body>
+</html>

chart_df.png ADDED Viewed

flagged/Output 2/0.png ADDED Viewed

flagged/log.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+command or multiline text,Output 1,Output 2,timestamp
+pyth1on -m site,"seg_text output (segmented sents):
+pyth1on -m site",Output 2/0.png,2022-01-08 14:04:38.035704

gradiobee/seg_text.py CHANGED Viewed

@@ -7,8 +7,11 @@ else use polyglot.text.Text
 !install pyicu pycld2 Morfessor
 !pip install polyglot sentence_splitter
 """
-from typing import List, Optional
 from tqdm.auto import tqdm
 from polyglot.detect.base import logger as polyglot_logger
 from polyglot.text import Detector, Text
@@ -27,34 +30,39 @@ LANG_S = ["ca", "cs", "da", "nl", "en", "fi", "fr", "de",
           "pt", "ro", "ru", "sk", "sl", "es", "sv", "tr"]
-def seg_text(
         text: str,
         lang: Optional[str] = None,
-        qmode: bool = False,
         maxlines: int = 1000
 ) -> List[str]:
     # fmt: on
-    """
-    Split text to sentences.
     Use sentence_splitter if supported,
     else use polyglot.text.Text.sentences
-    qmode: skip split_text_into_sentences if True, default False
         vectors for all books are based on qmode=False.
         qmode=True is for quick test purpose only
-    maxlines (default 1000), threhold for turn on tqdm progressbar
         set to <1 or a large number to turn it off
     """
     if lang is None:
         try:
             lang = Detector(text).language.code
         except Exception as exc:
-            logger.warning("polyglot.text.Detector exc: %s, setting to 'en'", exc)
             lang = "en"
-    if not qmode and lang in LANG_S:
         _ = []
         lines = text.splitlines()
         # if maxlines > 1 and len(lines) > maxlines:
@@ -70,4 +78,42 @@ def seg_text(
         # return split_text_into_sentences(text, lang)
     return [elm.string for elm in Text(text, lang).sentences]

 !install pyicu pycld2 Morfessor
 !pip install polyglot sentence_splitter
 """
+# pylint: disable=
+from typing import List, Optional, Union
+import re
 from tqdm.auto import tqdm
 from polyglot.detect.base import logger as polyglot_logger
 from polyglot.text import Detector, Text
           "pt", "ro", "ru", "sk", "sl", "es", "sv", "tr"]
+def _seg_text(
         text: str,
         lang: Optional[str] = None,
+        # qmode: bool = False,
         maxlines: int = 1000
 ) -> List[str]:
     # fmt: on
+    """Split text to sentences.
     Use sentence_splitter if supported,
     else use polyglot.text.Text.sentences
+    Blank lines will be removed.
+    qmode: quick mode, skip split_text_into_sentences if True, default False
         vectors for all books are based on qmode=False.
         qmode=True is for quick test purpose only
+    maxlines (default 1000), threshold for turn on tqdm progressbar
         set to <1 or a large number to turn it off
     """
     if lang is None:
         try:
             lang = Detector(text).language.code
         except Exception as exc:
+            logger.info("text[:30]: %s", text[:30])
+            logger.warning(
+                "polyglot.text.Detector exc: %s, setting to 'en'",
+                exc
+            )
             lang = "en"
+    # if not qmode and lang in LANG_S:
+    if lang in LANG_S:
         _ = []
         lines = text.splitlines()
         # if maxlines > 1 and len(lines) > maxlines:
         # return split_text_into_sentences(text, lang)
+    # empty "" text or blank to avoid Exception
+    if not text.strip():
+        return []
     return [elm.string for elm in Text(text, lang).sentences]
+# fmt: off
+def seg_text(
+        lst: Union[str, List[str]],
+        lang: Optional[str] = None,
+        maxlines: int = 1000,
+        extra: Optional[str] = None,
+) -> List[str]:
+    # fmt:on
+    """Split a list of text.
+    Arguments:
+        lst: text or text list
+        extra: re.split(rf"{extra}, text) first
+    Returns:
+        list of splitted text.
+    """
+    if isinstance(lst, str):
+        lst = [lst]
+    if extra:
+        # insert \n
+        lst = [re.sub(rf"({extra})", r"\1\n", elm) for elm in lst]
+    res = []
+    for elm in lst:
+        res.extend(_seg_text(
+            elm,
+            lang=lang,
+            maxlines=maxlines,
+        ))
+    return res

requirements.in CHANGED Viewed

@@ -10,3 +10,5 @@ pycld2
 tqdm
 polyglot
 sentence_splitter

 tqdm
 polyglot
 sentence_splitter
+altair
+altair_saver

requirements.txt CHANGED Viewed

@@ -4,6 +4,10 @@
 #
 #    pip-compile requirements.in
 #
 blis==0.7.5
     # via
     #   spacy
@@ -34,18 +38,26 @@ cymem==2.0.6
     #   thinc
 cytoolz==0.11.2
     # via textacy
 fonttools==4.28.5
     # via matplotlib
 idna==3.3
     # via requests
 jellyfish==0.8.9
     # via textacy
 jinja2==3.0.3
-    # via spacy
 joblib==1.1.0
     # via
     #   scikit-learn
     #   textacy
 kiwisolver==1.3.2
     # via matplotlib
 langcodes==3.3.0
@@ -69,6 +81,7 @@ networkx==2.6.3
     # via textacy
 numpy==1.21.5
     # via
     #   blis
     #   matplotlib
     #   pandas
@@ -83,7 +96,9 @@ packaging==21.3
     #   matplotlib
     #   spacy
 pandas==1.3.5
-    # via seaborn
 pathy==0.6.1
     # via spacy
 pillow==8.4.0
@@ -108,6 +123,8 @@ pyparsing==3.0.6
     #   packaging
 pyphen==0.12.0
     # via textacy
 python-dateutil==2.8.2
     # via
     #   matplotlib
@@ -156,7 +173,9 @@ thinc==8.0.13
 threadpoolctl==3.0.0
     # via scikit-learn
 toolz==0.11.2
-    # via cytoolz
 tqdm==4.62.3
     # via
     #   -r requirements.in
@@ -175,7 +194,9 @@ wasabi==0.9.0
     #   spacy
     #   spacy-loggers
     #   thinc
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
-pyicu

 #
 #    pip-compile requirements.in
 #
+altair==4.2.0
+    # via -r requirements.in
+attrs==21.4.0
+    # via jsonschema
 blis==0.7.5
     # via
     #   spacy
     #   thinc
 cytoolz==0.11.2
     # via textacy
+entrypoints==0.3
+    # via altair
 fonttools==4.28.5
     # via matplotlib
 idna==3.3
     # via requests
+importlib-resources==5.4.0
+    # via jsonschema
 jellyfish==0.8.9
     # via textacy
 jinja2==3.0.3
+    # via
+    #   altair
+    #   spacy
 joblib==1.1.0
     # via
     #   scikit-learn
     #   textacy
+jsonschema==4.3.3
+    # via altair
 kiwisolver==1.3.2
     # via matplotlib
 langcodes==3.3.0
     # via textacy
 numpy==1.21.5
     # via
+    #   altair
     #   blis
     #   matplotlib
     #   pandas
     #   matplotlib
     #   spacy
 pandas==1.3.5
+    # via
+    #   altair
+    #   seaborn
 pathy==0.6.1
     # via spacy
 pillow==8.4.0
     #   packaging
 pyphen==0.12.0
     # via textacy
+pyrsistent==0.18.0
+    # via jsonschema
 python-dateutil==2.8.2
     # via
     #   matplotlib
 threadpoolctl==3.0.0
     # via scikit-learn
 toolz==0.11.2
+    # via
+    #   altair
+    #   cytoolz
 tqdm==4.62.3
     # via
     #   -r requirements.in
     #   spacy
     #   spacy-loggers
     #   thinc
+zipp==3.7.0
+    # via importlib-resources
 # The following packages are considered to be unsafe in a requirements file:
 # setuptools
+altair_saver

run-nuitka.bat ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ REM python -m nuitka app.py
2	+ python -m nuitka --nofollow-imports app.py

run-pyinstaller-spec.bat ADDED Viewed

	@@ -0,0 +1 @@


1	+ pyinstaller -y app.spec

run-python-app_py.bat CHANGED Viewed

@@ -1,3 +1,3 @@
 REM nodemon -w app.py -x .venv\Scripts\python app.py
 REM nodemon -w app.py -x py -3.7 app.py
-nodemon -w app.py -x py -3.8 app.py

 REM nodemon -w app.py -x .venv\Scripts\python app.py
 REM nodemon -w app.py -x py -3.7 app.py
+nodemon -w app.py -x "pyright app.py && py -3.8 app.py"

tests/__init__.py ADDED Viewed

File without changes

tests/test_seg_text.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""Test seg_text."""
+import pytest
+from gradiobee.seg_text import seg_text
+def test_seg_text1():
+    """Test seg_text 1."""
+    text = " text 1\n\n test 2. test 3"
+    _ = seg_text(text)
+    assert len(_) == 2
+    text = " text 1\n\n test 2. Test 3"
+    _ = seg_text(text)
+    assert len(_) == 3
+@pytest.mark.parametrize(
+    "test_input,expected", [
+        ("", []),
+        (" ", []),
+        (" \n ", []),
+    ]
+)
+def test_seg_text_blanks(test_input, expected):
+    """Test blanks."""
+    assert seg_text(test_input) == expected
+def test_seg_text_semicolon ():
+    """Test semicolon."""
+    text = """ “元宇宙”，英文為“Metaverse”。該詞出自1992年；的科幻小說《雪崩》。 """
+    assert len(seg_text(text)) == 2
+    assert len(seg_text(text, 'zh')) == 2
+    assert len(seg_text(text, 'ja')) == 2
+    assert len(seg_text(text, 'ko')) == 2
+    assert len(seg_text(text, 'en')) == 1
+def test_seg_text_semicolon_extra ():
+    """Test semicolon."""
+    extra = "[;；]"
+    text = """ “元宇宙”，英文為“Metaverse”。該詞出自1992年；的科幻小說《雪崩》。 """
+    assert len(seg_text(text, extra=extra)) == 2 + 1
+    assert len(seg_text(text, 'zh', extra=extra)) == 2 + 1
+    assert len(seg_text(text, 'ja', extra=extra)) == 2 + 1
+    assert len(seg_text(text, 'ko', extra=extra)) == 2 + 1
+    assert len(seg_text(text, 'en', extra=extra)) == 1 + 1