diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..4e1d29cf9c3a86ab07cd5bd0bfe37b458352ec47 Binary files /dev/null and b/.DS_Store differ diff --git a/img/Mo-PTT-Logo.png b/img/Mo-PTT-Logo.png new file mode 100644 index 0000000000000000000000000000000000000000..f88ac6d7961f473e49446a8adb85f4ecce0034cb Binary files /dev/null and b/img/Mo-PTT-Logo.png differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1c260239b37d49fe85d814d72a64cf724fa80df3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,84 @@ +aiofiles==0.8.0 +altair==4.2.0 +attrs==22.1.0 +backports.zoneinfo==0.2.1 +beautifulsoup4==4.11.1 +blinker==1.5 +cachetools==5.2.0 +certifi==2022.5.18.1 +charset-normalizer==2.0.12 +ckip-transformers==0.3.2 +click==8.1.3 +commonmark==0.9.1 +conda==4.3.16 +CwnGraph==0.4.0 +CwnSenseTagger==0.1.6 +cycler==0.11.0 +decorator==5.1.1 +DistilTag==0.2.2 +entrypoints==0.4 +filelock==3.8.0 +fonttools==4.33.3 +gdown==4.5.1 +gitdb==4.0.9 +GitPython==3.1.27 +huggingface-hub==0.10.0 +idna==3.3 +importlib-metadata==4.12.0 +importlib-resources==5.9.0 +jieba==0.42.1 +Jinja2==3.1.2 +joblib==1.2.0 +jsonschema==4.16.0 +kiwisolver==1.4.2 +MarkupSafe==2.1.1 +matplotlib==3.5.2 +nltk==3.7 +numpy==1.22.4 +packaging==21.3 +pandas==1.5.0 +pdfminer==20191125 +pdfminer3k==1.3.4 +Pillow==9.1.0 +pkgutil-resolve-name==1.3.10 +ply==3.11 +protobuf==3.20.2 +pyarrow==9.0.0 +pycosat==0.6.3 +pycryptodome==3.14.1 +pydeck==0.8.0b3 +Pygments==2.13.0 +Pympler==1.0.1 +pyparsing==3.0.9 +pyrsistent==0.18.1 +PySocks==1.7.1 +python-dateutil==2.8.2 +python-dotenv==0.20.0 +pytz==2022.2.1 +pytz-deprecation-shim==0.1.0.post0 +PyYAML==6.0 +regex==2022.9.13 +requests==2.27.1 +rich==12.5.1 +ruamel.yaml==0.17.21 +ruamel.yaml.clib==0.2.6 +semver==2.13.0 +six==1.16.0 +smmap==5.0.0 +snownlp==0.12.3 +soupsieve==2.3.2.post1 +streamlit==1.12.2 +tokenizers==0.12.1 +toml==0.10.2 +toolz==0.12.0 +torch==1.12.1 +tornado==6.2 +tqdm==4.64.1 +transformers==4.22.2 +typing-extensions==4.3.0 +tzdata==2022.4 +tzlocal==4.2 +urllib3==1.26.9 +validators==0.20.0 +wordcloud==1.8.1 +zipp==3.8.1 \ No newline at end of file diff --git a/twNLP-app/.DS_Store b/twNLP-app/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..0eb6738683df74304b96a1e06884705929cdec1f Binary files /dev/null and b/twNLP-app/.DS_Store differ diff --git a/twNLP-app/.dockerignore b/twNLP-app/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..4b030ab2f2592605c723fd1fe0ccfb061041c54c --- /dev/null +++ b/twNLP-app/.dockerignore @@ -0,0 +1,101 @@ +.git/ +.gitignore + +# Docker +docker-compose.yml +.docker + +# Byte-compiled / optimized / DLL files +__pycache__/ +*/__pycache__/ +*/*/__pycache__/ +*/*/*/__pycache__/ +*.py[cod] +*/*.py[cod] +*/*/*.py[cod] +*/*/*/*.py[cod] +**/*.egg-info +**/*.egg/ +**/*.pyc +**/*.swp + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Virtual environment +.env/ +.venv/ +venv/ + +# PyCharm +.idea + +# Python mode for VIM +.ropeproject +*/.ropeproject +*/*/.ropeproject +*/*/*/.ropeproject + +# Vim swap files +*.swp +*/*.swp +*/*/*.swp +*/*/*/*.swp + +# readme +README.md + +# tempCodeRunnerFile +tempCodeRunnerFile.py + +# pickle files +*/*/*.pickle \ No newline at end of file diff --git a/twNLP-app/.gitignore b/twNLP-app/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..95978f41fe380294f59bbc3ce0a9ef44968208ba --- /dev/null +++ b/twNLP-app/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +*.ipynb + +# sqlite3 database +*.db + +# tempCodeRunnerFile +tempCodeRunnerFile.py + +# pickle files +*.pickle \ No newline at end of file diff --git a/twNLP-app/.streamlit/config.toml b/twNLP-app/.streamlit/config.toml new file mode 100644 index 0000000000000000000000000000000000000000..e7d01b1c70efb205cd56e02e3e67d20eed63acff --- /dev/null +++ b/twNLP-app/.streamlit/config.toml @@ -0,0 +1,8 @@ +[logger] +level = "info" +messageFormat = "%(asctime)s %(message)s" + +[theme] +# Primary accent for interactive elements +primaryColor = '#228be6' +backgroundColor = 'rgb(255, 255, 255)' \ No newline at end of file diff --git a/twNLP-app/Dockerfile b/twNLP-app/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..13cf5037e7ad432722fee410382abd8dddd80f5c --- /dev/null +++ b/twNLP-app/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.7-slim + +WORKDIR /app + +COPY ./Pipfile* ./ + +RUN pip install pipenv && \ + apt-get update && \ + apt-get install -y --no-install-recommends gcc python3-dev libssl-dev && \ + pipenv install --deploy --system && \ + apt-get remove -y gcc python3-dev libssl-dev && \ + apt-get autoremove -y && \ + pip uninstall pipenv -y \ + && rm -rf /var/lib/apt/lists/* + +COPY ./.streamlit ./.streamlit + +COPY ./src ./src + +EXPOSE 8501 \ No newline at end of file diff --git a/twNLP-app/LICENSE b/twNLP-app/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/twNLP-app/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/twNLP-app/Pipfile b/twNLP-app/Pipfile new file mode 100644 index 0000000000000000000000000000000000000000..95ce3549b10697b0880efd930f22c73ccdd8aa24 --- /dev/null +++ b/twNLP-app/Pipfile @@ -0,0 +1,18 @@ +[[source]] +url = "https://pypi.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +streamlit = "*" +ckip-transformers = "*" +cwngraph = "*" +distiltag = "*" +cwnsensetagger = "*" +aiofiles = "*" +python-dotenv = "*" + +[dev-packages] + +[requires] +python_version = "3.7" diff --git a/twNLP-app/Pipfile.lock b/twNLP-app/Pipfile.lock new file mode 100644 index 0000000000000000000000000000000000000000..2bb7aa7470e837a178ca0d6ca29d7582fdd10c86 --- /dev/null +++ b/twNLP-app/Pipfile.lock @@ -0,0 +1,953 @@ +{ + "_meta": { + "hash": { + "sha256": "c3ccdc36761e5f63f4df701a2b449aae3db40ccf5b055b6fc06ba39e61001309" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "aiofiles": { + "hashes": [ + "sha256:7a973fc22b29e9962d0897805ace5856e6a566ab1f0c8e5c91ff6c866519c937", + "sha256:8334f23235248a3b2e83b2c3a78a22674f39969b96397126cc93664d9a901e59" + ], + "index": "pypi", + "version": "==0.8.0" + }, + "altair": { + "hashes": [ + "sha256:0c724848ae53410c13fa28be2b3b9a9dcb7b5caa1a70f7f217bd663bb419935a", + "sha256:d87d9372e63b48cd96b2a6415f0cf9457f50162ab79dc7a31cd7e024dd840026" + ], + "markers": "python_version >= '3.7'", + "version": "==4.2.0" + }, + "attrs": { + "hashes": [ + "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6", + "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c" + ], + "markers": "python_version >= '3.5'", + "version": "==22.1.0" + }, + "backports.zoneinfo": { + "hashes": [ + "sha256:17746bd546106fa389c51dbea67c8b7c8f0d14b5526a579ca6ccf5ed72c526cf", + "sha256:1b13e654a55cd45672cb54ed12148cd33628f672548f373963b0bff67b217328", + "sha256:1c5742112073a563c81f786e77514969acb58649bcdf6cdf0b4ed31a348d4546", + "sha256:4a0f800587060bf8880f954dbef70de6c11bbe59c673c3d818921f042f9954a6", + "sha256:5c144945a7752ca544b4b78c8c41544cdfaf9786f25fe5ffb10e838e19a27570", + "sha256:7b0a64cda4145548fed9efc10322770f929b944ce5cee6c0dfe0c87bf4c0c8c9", + "sha256:8439c030a11780786a2002261569bdf362264f605dfa4d65090b64b05c9f79a7", + "sha256:8961c0f32cd0336fb8e8ead11a1f8cd99ec07145ec2931122faaac1c8f7fd987", + "sha256:89a48c0d158a3cc3f654da4c2de1ceba85263fafb861b98b59040a5086259722", + "sha256:a76b38c52400b762e48131494ba26be363491ac4f9a04c1b7e92483d169f6582", + "sha256:da6013fd84a690242c310d77ddb8441a559e9cb3d3d59ebac9aca1a57b2e18bc", + "sha256:e55b384612d93be96506932a786bbcde5a2db7a9e6a4bb4bffe8b733f5b9036b", + "sha256:e81b76cace8eda1fca50e345242ba977f9be6ae3945af8d46326d776b4cf78d1", + "sha256:e8236383a20872c0cdf5a62b554b27538db7fa1bbec52429d8d106effbaeca08", + "sha256:f04e857b59d9d1ccc39ce2da1021d196e47234873820cbeaad210724b1ee28ac", + "sha256:fadbfe37f74051d024037f223b8e001611eac868b5c5b06144ef4d8b799862f2" + ], + "markers": "python_version < '3.9'", + "version": "==0.2.1" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:58d5c3d29f5a36ffeb94f02f0d786cd53014cf9b3b3951d42e0080d8a9498d30", + "sha256:ad9aa55b65ef2808eb405f46cf74df7fcb7044d5cbc26487f96eb2ef2e436693" + ], + "markers": "python_version >= '3.6'", + "version": "==4.11.1" + }, + "blinker": { + "hashes": [ + "sha256:1eb563df6fdbc39eeddc177d953203f99f097e9bf0e2b8f9f3cf18b6ca425e36", + "sha256:923e5e2f69c155f2cc42dafbbd70e16e3fde24d2d4aa2ab72fbe386238892462" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==1.5" + }, + "cachetools": { + "hashes": [ + "sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757", + "sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db" + ], + "markers": "python_version ~= '3.7'", + "version": "==5.2.0" + }, + "certifi": { + "hashes": [ + "sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d", + "sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.6.15" + }, + "charset-normalizer": { + "hashes": [ + "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845", + "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f" + ], + "markers": "python_version >= '3.6'", + "version": "==2.1.1" + }, + "ckip-transformers": { + "hashes": [ + "sha256:48affb723a40e535ccbcc0b4bbb14a86695373167218786eb00b06531cebb22e", + "sha256:a13c8ec21e209d3854601dae623f91046d2ee2c7f3fcb693752bfd57e896c776" + ], + "index": "pypi", + "version": "==0.3.2" + }, + "click": { + "hashes": [ + "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", + "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.3" + }, + "commonmark": { + "hashes": [ + "sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60", + "sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9" + ], + "version": "==0.9.1" + }, + "cwngraph": { + "hashes": [ + "sha256:20acb3e9b6ac86e724537718d729c0fc81c7634932213bef42d8c03c94ae9236", + "sha256:bb88150cfa088db51b19094794de156d1214539cb67928f35c424e81ab39c392" + ], + "index": "pypi", + "version": "==0.4.0" + }, + "cwnsensetagger": { + "hashes": [ + "sha256:00727031fa61585d8b21465630b134b35be37138b6edbad7b7f43c76769a1ca0", + "sha256:f3a7a720ab81c452cf87294f84bd9e62357000127db39c32526da19fdeddafa0" + ], + "index": "pypi", + "version": "==0.1.6" + }, + "decorator": { + "hashes": [ + "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", + "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186" + ], + "markers": "python_version >= '3.5'", + "version": "==5.1.1" + }, + "distiltag": { + "hashes": [ + "sha256:24dff55253d54e237aa1339d5d7252de3e2c832b5082ba0751fd946db1f085bd", + "sha256:2bc723a952cae40ced3478a04698399a91612f8025142c7fb313fd905a6b7a7e" + ], + "index": "pypi", + "version": "==0.2.2" + }, + "entrypoints": { + "hashes": [ + "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4", + "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f" + ], + "markers": "python_version >= '3.6'", + "version": "==0.4" + }, + "filelock": { + "hashes": [ + "sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc", + "sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4" + ], + "markers": "python_version >= '3.7'", + "version": "==3.8.0" + }, + "gdown": { + "hashes": [ + "sha256:8217061063d8afcaad9d8e2d5410fbc0989c2b3f0946a6bd9bfeb9461616cc6a" + ], + "version": "==4.5.1" + }, + "gitdb": { + "hashes": [ + "sha256:8033ad4e853066ba6ca92050b9df2f89301b8fc8bf7e9324d412a63f8bf1a8fd", + "sha256:bac2fd45c0a1c9cf619e63a90d62bdc63892ef92387424b855792a6cabe789aa" + ], + "markers": "python_version >= '3.6'", + "version": "==4.0.9" + }, + "gitpython": { + "hashes": [ + "sha256:1c885ce809e8ba2d88a29befeb385fcea06338d3640712b59ca623c220bb5704", + "sha256:5b68b000463593e05ff2b261acff0ff0972df8ab1b70d3cdbd41b546c8b8fc3d" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.27" + }, + "huggingface-hub": { + "hashes": [ + "sha256:6395f26aaf44bbb4a73d3e14aca228fa39534696f651c6c82a6347f8c9f5950b", + "sha256:7a588046bdeb84e7bc99b3da58bbb4312a56d94ba51ebc60dfe610c18b3d0b9f" + ], + "markers": "python_version >= '3.7'", + "version": "==0.9.1" + }, + "idna": { + "hashes": [ + "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff", + "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3" + }, + "importlib-metadata": { + "hashes": [ + "sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670", + "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23" + ], + "markers": "python_version >= '3.7'", + "version": "==4.12.0" + }, + "importlib-resources": { + "hashes": [ + "sha256:5481e97fb45af8dcf2f798952625591c58fe599d0735d86b10f54de086a61681", + "sha256:f78a8df21a79bcc30cfd400bdc38f314333de7c0fb619763f6b9dabab8268bb7" + ], + "markers": "python_version < '3.9'", + "version": "==5.9.0" + }, + "jinja2": { + "hashes": [ + "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852", + "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61" + ], + "markers": "python_version >= '3.7'", + "version": "==3.1.2" + }, + "joblib": { + "hashes": [ + "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35", + "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6" + ], + "markers": "python_version >= '3.6'", + "version": "==1.1.0" + }, + "jsonschema": { + "hashes": [ + "sha256:15062f4cc6f591400cd528d2c355f2cfa6a57e44c820dc783aee5e23d36a831f", + "sha256:9892b8d630a82990521a9ca630d3446bd316b5ad54dbe981338802787f3e0d2d" + ], + "markers": "python_version >= '3.7'", + "version": "==4.14.0" + }, + "markupsafe": { + "hashes": [ + "sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003", + "sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88", + "sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5", + "sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7", + "sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a", + "sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603", + "sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1", + "sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135", + "sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247", + "sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6", + "sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601", + "sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77", + "sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02", + "sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e", + "sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63", + "sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f", + "sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980", + "sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b", + "sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812", + "sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff", + "sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96", + "sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1", + "sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925", + "sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a", + "sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6", + "sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e", + "sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f", + "sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4", + "sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f", + "sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3", + "sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c", + "sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a", + "sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417", + "sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a", + "sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a", + "sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37", + "sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452", + "sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933", + "sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a", + "sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7" + ], + "markers": "python_version >= '3.7'", + "version": "==2.1.1" + }, + "nltk": { + "hashes": [ + "sha256:ba3de02490308b248f9b94c8bc1ac0683e9aa2ec49ee78536d8667afb5e3eec8", + "sha256:d6507d6460cec76d70afea4242a226a7542f85c669177b9c7f562b7cf1b05502" + ], + "markers": "python_version >= '3.7'", + "version": "==3.7" + }, + "numpy": { + "hashes": [ + "sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac", + "sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3", + "sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6", + "sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1", + "sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a", + "sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b", + "sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470", + "sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1", + "sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab", + "sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46", + "sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673", + "sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7", + "sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db", + "sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e", + "sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786", + "sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552", + "sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25", + "sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6", + "sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2", + "sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a", + "sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf", + "sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f", + "sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c", + "sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4", + "sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b", + "sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0", + "sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3", + "sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656", + "sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0", + "sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb", + "sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e" + ], + "markers": "python_version < '3.11' and python_version >= '3.7'", + "version": "==1.21.6" + }, + "packaging": { + "hashes": [ + "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb", + "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + ], + "markers": "python_version >= '3.6'", + "version": "==21.3" + }, + "pandas": { + "hashes": [ + "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1", + "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0", + "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6", + "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006", + "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2", + "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7", + "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0", + "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56", + "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4", + "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02", + "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296", + "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9", + "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c", + "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6", + "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39", + "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb", + "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58", + "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6", + "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b", + "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf", + "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f", + "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3", + "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f", + "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb", + "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd" + ], + "markers": "python_full_version >= '3.7.1'", + "version": "==1.3.5" + }, + "pillow": { + "hashes": [ + "sha256:0030fdbd926fb85844b8b92e2f9449ba89607231d3dd597a21ae72dc7fe26927", + "sha256:030e3460861488e249731c3e7ab59b07c7853838ff3b8e16aac9561bb345da14", + "sha256:0ed2c4ef2451de908c90436d6e8092e13a43992f1860275b4d8082667fbb2ffc", + "sha256:136659638f61a251e8ed3b331fc6ccd124590eeff539de57c5f80ef3a9594e58", + "sha256:13b725463f32df1bfeacbf3dd197fb358ae8ebcd8c5548faa75126ea425ccb60", + "sha256:1536ad017a9f789430fb6b8be8bf99d2f214c76502becc196c6f2d9a75b01b76", + "sha256:15928f824870535c85dbf949c09d6ae7d3d6ac2d6efec80f3227f73eefba741c", + "sha256:17d4cafe22f050b46d983b71c707162d63d796a1235cdf8b9d7a112e97b15bac", + "sha256:1802f34298f5ba11d55e5bb09c31997dc0c6aed919658dfdf0198a2fe75d5490", + "sha256:1cc1d2451e8a3b4bfdb9caf745b58e6c7a77d2e469159b0d527a4554d73694d1", + "sha256:1fd6f5e3c0e4697fa7eb45b6e93996299f3feee73a3175fa451f49a74d092b9f", + "sha256:254164c57bab4b459f14c64e93df11eff5ded575192c294a0c49270f22c5d93d", + "sha256:2ad0d4df0f5ef2247e27fc790d5c9b5a0af8ade9ba340db4a73bb1a4a3e5fb4f", + "sha256:2c58b24e3a63efd22554c676d81b0e57f80e0a7d3a5874a7e14ce90ec40d3069", + "sha256:2d33a11f601213dcd5718109c09a52c2a1c893e7461f0be2d6febc2879ec2402", + "sha256:337a74fd2f291c607d220c793a8135273c4c2ab001b03e601c36766005f36885", + "sha256:37ff6b522a26d0538b753f0b4e8e164fdada12db6c6f00f62145d732d8a3152e", + "sha256:3d1f14f5f691f55e1b47f824ca4fdcb4b19b4323fe43cc7bb105988cad7496be", + "sha256:408673ed75594933714482501fe97e055a42996087eeca7e5d06e33218d05aa8", + "sha256:4134d3f1ba5f15027ff5c04296f13328fecd46921424084516bdb1b2548e66ff", + "sha256:4ad2f835e0ad81d1689f1b7e3fbac7b01bb8777d5a985c8962bedee0cc6d43da", + "sha256:50dff9cc21826d2977ef2d2a205504034e3a4563ca6f5db739b0d1026658e004", + "sha256:510cef4a3f401c246cfd8227b300828715dd055463cdca6176c2e4036df8bd4f", + "sha256:5aed7dde98403cd91d86a1115c78d8145c83078e864c1de1064f52e6feb61b20", + "sha256:69bd1a15d7ba3694631e00df8de65a8cb031911ca11f44929c97fe05eb9b6c1d", + "sha256:6bf088c1ce160f50ea40764f825ec9b72ed9da25346216b91361eef8ad1b8f8c", + "sha256:6e8c66f70fb539301e064f6478d7453e820d8a2c631da948a23384865cd95544", + "sha256:727dd1389bc5cb9827cbd1f9d40d2c2a1a0c9b32dd2261db522d22a604a6eec9", + "sha256:74a04183e6e64930b667d321524e3c5361094bb4af9083db5c301db64cd341f3", + "sha256:75e636fd3e0fb872693f23ccb8a5ff2cd578801251f3a4f6854c6a5d437d3c04", + "sha256:7761afe0126d046974a01e030ae7529ed0ca6a196de3ec6937c11df0df1bc91c", + "sha256:7888310f6214f19ab2b6df90f3f06afa3df7ef7355fc025e78a3044737fab1f5", + "sha256:7b0554af24df2bf96618dac71ddada02420f946be943b181108cac55a7a2dcd4", + "sha256:7c7b502bc34f6e32ba022b4a209638f9e097d7a9098104ae420eb8186217ebbb", + "sha256:808add66ea764ed97d44dda1ac4f2cfec4c1867d9efb16a33d158be79f32b8a4", + "sha256:831e648102c82f152e14c1a0938689dbb22480c548c8d4b8b248b3e50967b88c", + "sha256:93689632949aff41199090eff5474f3990b6823404e45d66a5d44304e9cdc467", + "sha256:96b5e6874431df16aee0c1ba237574cb6dff1dcb173798faa6a9d8b399a05d0e", + "sha256:9a54614049a18a2d6fe156e68e188da02a046a4a93cf24f373bffd977e943421", + "sha256:a138441e95562b3c078746a22f8fca8ff1c22c014f856278bdbdd89ca36cff1b", + "sha256:a647c0d4478b995c5e54615a2e5360ccedd2f85e70ab57fbe817ca613d5e63b8", + "sha256:a9c9bc489f8ab30906d7a85afac4b4944a572a7432e00698a7239f44a44e6efb", + "sha256:ad2277b185ebce47a63f4dc6302e30f05762b688f8dc3de55dbae4651872cdf3", + "sha256:b6d5e92df2b77665e07ddb2e4dbd6d644b78e4c0d2e9272a852627cdba0d75cf", + "sha256:bc431b065722a5ad1dfb4df354fb9333b7a582a5ee39a90e6ffff688d72f27a1", + "sha256:bdd0de2d64688ecae88dd8935012c4a72681e5df632af903a1dca8c5e7aa871a", + "sha256:c79698d4cd9318d9481d89a77e2d3fcaeff5486be641e60a4b49f3d2ecca4e28", + "sha256:cb6259196a589123d755380b65127ddc60f4c64b21fc3bb46ce3a6ea663659b0", + "sha256:d5b87da55a08acb586bad5c3aa3b86505f559b84f39035b233d5bf844b0834b1", + "sha256:dcd7b9c7139dc8258d164b55696ecd16c04607f1cc33ba7af86613881ffe4ac8", + "sha256:dfe4c1fedfde4e2fbc009d5ad420647f7730d719786388b7de0999bf32c0d9fd", + "sha256:ea98f633d45f7e815db648fd7ff0f19e328302ac36427343e4432c84432e7ff4", + "sha256:ec52c351b35ca269cb1f8069d610fc45c5bd38c3e91f9ab4cbbf0aebc136d9c8", + "sha256:eef7592281f7c174d3d6cbfbb7ee5984a671fcd77e3fc78e973d492e9bf0eb3f", + "sha256:f07f1f00e22b231dd3d9b9208692042e29792d6bd4f6639415d2f23158a80013", + "sha256:f3fac744f9b540148fa7715a435d2283b71f68bfb6d4aae24482a890aed18b59", + "sha256:fa768eff5f9f958270b081bb33581b4b569faabf8774726b283edb06617101dc", + "sha256:fac2d65901fb0fdf20363fbd345c01958a742f2dc62a8dd4495af66e3ff502a4" + ], + "markers": "python_version >= '3.7'", + "version": "==9.2.0" + }, + "pkgutil-resolve-name": { + "hashes": [ + "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174", + "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e" + ], + "markers": "python_version < '3.9'", + "version": "==1.3.10" + }, + "protobuf": { + "hashes": [ + "sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf", + "sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f", + "sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f", + "sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7", + "sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996", + "sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067", + "sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c", + "sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7", + "sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9", + "sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c", + "sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739", + "sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91", + "sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c", + "sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153", + "sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9", + "sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388", + "sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e", + "sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab", + "sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde", + "sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531", + "sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8", + "sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7", + "sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20", + "sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3" + ], + "markers": "python_version >= '3.7'", + "version": "==3.20.1" + }, + "pyarrow": { + "hashes": [ + "sha256:0238998dc692efcb4e41ae74738d7c1234723271ccf520bd8312dca07d49ef8d", + "sha256:02b820ecd1da02012092c180447de449fc688d0c3f9ff8526ca301cdd60dacd0", + "sha256:1c5a073a930c632058461547e0bc572da1e724b17b6b9eb31a97da13f50cb6e0", + "sha256:29eb3e086e2b26202f3a4678316b93cfb15d0e2ba20f3ec12db8fd9cc07cde63", + "sha256:2c715eca2092273dcccf6f08437371e04d112f9354245ba2fbe6c801879450b7", + "sha256:2e753f8fcf07d8e3a0efa0c8bd51fef5c90281ffd4c5637c08ce42cd0ac297de", + "sha256:3eef8a981f45d89de403e81fb83b8119c20824caddf1404274e41a5d66c73806", + "sha256:4eebdab05afa23d5d5274b24c1cbeb1ba017d67c280f7d39fd8a8f18cbad2ec9", + "sha256:5526a3bfb404ff6d31d62ea582cf2466c7378a474a99ee04d1a9b05de5264541", + "sha256:55328348b9139c2b47450d512d716c2248fd58e2f04e2fc23a65e18726666d42", + "sha256:767cafb14278165ad539a2918c14c1b73cf20689747c21375c38e3fe62884902", + "sha256:7fa56cbd415cef912677270b8e41baad70cde04c6d8a8336eeb2aba85aa93706", + "sha256:7fb02bebc13ab55573d1ae9bb5002a6d20ba767bf8569b52fce5301d42495ab7", + "sha256:81a60bb291a964f63b2717fb1b28f6615ffab7e8585322bfb8a6738e6b321282", + "sha256:8ad430cee28ebc4d6661fc7315747c7a18ae2a74e67498dcb039e1c762a2fb67", + "sha256:92f3977e901db1ef5cba30d6cc1d7942b8d94b910c60f89013e8f7bb86a86eef", + "sha256:9cef618159567d5f62040f2b79b1c7b38e3885f4ffad0ec97cd2d86f88b67cef", + "sha256:a5b390bdcfb8c5b900ef543f911cdfec63e88524fafbcc15f83767202a4a2491", + "sha256:d9eb04db626fa24fdfb83c00f76679ca0d98728cdbaa0481b6402bf793a290c0", + "sha256:da3e0f319509a5881867effd7024099fb06950a0768dad0d6873668bb88cfaba", + "sha256:f11a645a41ee531c3a5edda45dea07c42267f52571f818d388971d33fc7e2d4a", + "sha256:f241bd488c2705df930eedfe304ada71191dcf67d6b98ceda0cc934fd2a8388e", + "sha256:f59bcd5217a3ae1e17870792f82b2ff92df9f3862996e2c78e156c13e56ff62e", + "sha256:f8c46bde1030d704e2796182286d1c56846552c50a39ad5bf5a20c0d8159fc35", + "sha256:fc856628acd8d281652c15b6268ec7f27ebcb015abbe99d9baad17f02adc51f1", + "sha256:fe2ce795fa1d95e4e940fe5661c3c58aee7181c730f65ac5dd8794a77228de59" + ], + "markers": "python_version >= '3.7'", + "version": "==9.0.0" + }, + "pydeck": { + "hashes": [ + "sha256:b446e810e3c615e1604f4364e0e162ba371712805fab97a6b7722200b286b466", + "sha256:f7dec070b954b88d17795a4a41fe59192843f4f4cd9ecedde4eab167d0096c40" + ], + "markers": "python_version >= '3.7'", + "version": "==0.8.0b1" + }, + "pygments": { + "hashes": [ + "sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1", + "sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42" + ], + "markers": "python_version >= '3.6'", + "version": "==2.13.0" + }, + "pympler": { + "hashes": [ + "sha256:993f1a3599ca3f4fcd7160c7545ad06310c9e12f70174ae7ae8d4e25f6c5d3fa", + "sha256:d260dda9ae781e1eab6ea15bacb84015849833ba5555f141d2d9b7b7473b307d" + ], + "markers": "python_version >= '3.6'", + "version": "==1.0.1" + }, + "pyparsing": { + "hashes": [ + "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb", + "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc" + ], + "markers": "python_full_version >= '3.6.8'", + "version": "==3.0.9" + }, + "pyrsistent": { + "hashes": [ + "sha256:0e3e1fcc45199df76053026a51cc59ab2ea3fc7c094c6627e93b7b44cdae2c8c", + "sha256:1b34eedd6812bf4d33814fca1b66005805d3640ce53140ab8bbb1e2651b0d9bc", + "sha256:4ed6784ceac462a7d6fcb7e9b663e93b9a6fb373b7f43594f9ff68875788e01e", + "sha256:5d45866ececf4a5fff8742c25722da6d4c9e180daa7b405dc0a2a2790d668c26", + "sha256:636ce2dc235046ccd3d8c56a7ad54e99d5c1cd0ef07d9ae847306c91d11b5fec", + "sha256:6455fc599df93d1f60e1c5c4fe471499f08d190d57eca040c0ea182301321286", + "sha256:6bc66318fb7ee012071b2792024564973ecc80e9522842eb4e17743604b5e045", + "sha256:7bfe2388663fd18bd8ce7db2c91c7400bf3e1a9e8bd7d63bf7e77d39051b85ec", + "sha256:7ec335fc998faa4febe75cc5268a9eac0478b3f681602c1f27befaf2a1abe1d8", + "sha256:914474c9f1d93080338ace89cb2acee74f4f666fb0424896fcfb8d86058bf17c", + "sha256:b568f35ad53a7b07ed9b1b2bae09eb15cdd671a5ba5d2c66caee40dbf91c68ca", + "sha256:cdfd2c361b8a8e5d9499b9082b501c452ade8bbf42aef97ea04854f4a3f43b22", + "sha256:d1b96547410f76078eaf66d282ddca2e4baae8964364abb4f4dcdde855cd123a", + "sha256:d4d61f8b993a7255ba714df3aca52700f8125289f84f704cf80916517c46eb96", + "sha256:d7a096646eab884bf8bed965bad63ea327e0d0c38989fc83c5ea7b8a87037bfc", + "sha256:df46c854f490f81210870e509818b729db4488e1f30f2a1ce1698b2295a878d1", + "sha256:e24a828f57e0c337c8d8bb9f6b12f09dfdf0273da25fda9e314f0b684b415a07", + "sha256:e4f3149fd5eb9b285d6bfb54d2e5173f6a116fe19172686797c056672689daf6", + "sha256:e92a52c166426efbe0d1ec1332ee9119b6d32fc1f0bbfd55d5c1088070e7fc1b", + "sha256:f87cc2863ef33c709e237d4b5f4502a62a00fab450c9e020892e8e2ede5847f5", + "sha256:fd8da6d0124efa2f67d86fa70c851022f87c98e205f0594e1fae044e7119a5a6" + ], + "markers": "python_version >= '3.7'", + "version": "==0.18.1" + }, + "pysocks": { + "hashes": [ + "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", + "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", + "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" + ], + "version": "==1.7.1" + }, + "python-dateutil": { + "hashes": [ + "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86", + "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.8.2" + }, + "python-dotenv": { + "hashes": [ + "sha256:b7e3b04a59693c42c36f9ab1cc2acc46fa5df8c78e178fc33a8d4cd05c8d498f", + "sha256:d92a187be61fe482e4fd675b6d52200e7be63a12b724abbf931a40ce4fa92938" + ], + "index": "pypi", + "version": "==0.20.0" + }, + "pytz": { + "hashes": [ + "sha256:220f481bdafa09c3955dfbdddb7b57780e9a94f5127e35456a48589b9e0c0197", + "sha256:cea221417204f2d1a2aa03ddae3e867921971d0d76f14d87abb4414415bbdcf5" + ], + "version": "==2022.2.1" + }, + "pytz-deprecation-shim": { + "hashes": [ + "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6", + "sha256:af097bae1b616dde5c5744441e2ddc69e74dfdcb0c263129610d85b87445a59d" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==0.1.0.post0" + }, + "pyyaml": { + "hashes": [ + "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293", + "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b", + "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57", + "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b", + "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4", + "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07", + "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba", + "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9", + "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287", + "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513", + "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0", + "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0", + "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92", + "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f", + "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2", + "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc", + "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c", + "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86", + "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4", + "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c", + "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34", + "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b", + "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c", + "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb", + "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737", + "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3", + "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d", + "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53", + "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78", + "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803", + "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a", + "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174", + "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5" + ], + "markers": "python_version >= '3.6'", + "version": "==6.0" + }, + "regex": { + "hashes": [ + "sha256:02b6dc102123f5178796dcdb5a90f6e88895607fd1a1d115d8de1af8161ca2b4", + "sha256:0843cc977b9cc00eb2299b624db6481d25e7f5b093f7a7c2bb727028d4a26eda", + "sha256:085ca3dc9360c0210e0a70e5d34d66454a06077644e7679fef6358b1f053e62e", + "sha256:0a9d5a64e974bc5f160f30f76aaf993d49eeddb405676be6bf76a5a2c131e185", + "sha256:0de0ce11c0835e1117eacbfe8fa6fa98dc0e8e746b486735cb0fdebe46a02222", + "sha256:1418d3506a9582b23a27373f125ea2b0da523c581e7cf678a6f036254d134faa", + "sha256:14750172c0a616140a8f496dfef28ed24080e87d06d5838e008f959ad307a8c5", + "sha256:1b6d2c579ffdcbb3d93f63b6a7f697364594e1c1b6856958b3e61e3ca22c140a", + "sha256:1df31eaf147ecff3665ba861acb8f78221cd5501df072c9151dfa341dd24599f", + "sha256:21b6f939916aa61beea56393ebc8a9999060632ac22b8193c2cb67d6fd7cb2c3", + "sha256:2240fce3af236e4586a045c1be8bbf16c4f8831e68b7df918b72fc31a80143be", + "sha256:242f546fc5e49bb7395624ac3b4fc168bf454e11ace9804c58c4c3a90d84e38f", + "sha256:25bffa248b99b53a61b1f20fc7d19f711e38e9f0bc90d44c26670f8dc282ad7d", + "sha256:2ada67e02fa3fcca9e3b90cf24c2c6bc77f0abc126209937956aea10eeba40c7", + "sha256:2c198921afc811bc0f105c6e5150fbdebf9520c9b7d43cfc0ab156ca97f506d7", + "sha256:370b1d7aed26e29915c3fb3e72e327f194824a76cedb60c0b9f6c6af53e89d72", + "sha256:3aafbbf5076f2a48bcf31ceb42b410323daaa0ddb42544640592957bc906ace6", + "sha256:3d3d769b3d485b28d6a591b46723dbacc696e6503f48a3ef52e6fc2c90edb482", + "sha256:3d83fd6dd4263595d0e4f595d4abd54397cbed52c0147f7dd148a7b72910301e", + "sha256:45cb798095b886e4df6ff4a1f7661eb70620ccdef127e3c3e00a1aaa22d30e53", + "sha256:4bd9443f7ff6e6288dd4496215c5d903f851e55cbc09d5963587af0c6d565a0a", + "sha256:4bdfd016ab12c4075ef93f025b3cf4c8962b9b7a5e52bb7039ab64cb7755930c", + "sha256:4c6554073e3e554fbb3dff88376ada3da32ca789ea1b9e381f684d49ddb61199", + "sha256:4dad9d68574e93e1e23be53b4ecfb0f083bd5cc08cc7f1984a4ee3ebf12aa446", + "sha256:4e12a3c2d4781ee5d03f229c940934fa1e4ea4f4995e68ab97a2815b139e0804", + "sha256:53c9eca0d6070a8a3de42182ad26daf90ba12132eb74a2f45702332762aff84e", + "sha256:5910bb355f9517309f77101238dbacb7151ede3434a2f1fad26ecc62f13d8324", + "sha256:5c77eab46f3a2b2cd8bbe06467df783543bf7396df431eb4a144cc4b89e9fb3c", + "sha256:5d541bc430a74c787684d1ebcd205a5212a88c3de73848143e77489b2c25b911", + "sha256:5e7c8f9f8824143c219dd93cdc733c20d2c12f154034c89bcb4911db8e45bd92", + "sha256:5f14430535645712f546f1e07013507d1cc0c8abd851811dacce8c7fb584bf52", + "sha256:6059ae91667932d256d9dc03abd3512ebcade322b3a42d1b8354bd1db7f66dcc", + "sha256:61f6966371fa1cbf26c6209771a02bef80336cdaca0c0af4dfa33d51019c0b93", + "sha256:62d56a9d3c1e5a83076db4da060dad7ea35ac2f3cbd3c53ba5a51fe0caedb500", + "sha256:634f090a388351eadf1dcc1d168a190718fb68efb4b8fdc1b119cf837ca01905", + "sha256:64ecfcc386420192fbe98fdde777d993f7f2dfec9552e4f4024d3447d3a3e637", + "sha256:6af38997f178889d417851bae8fb5c00448f7405cfcab38734d771f1dd5d5973", + "sha256:6b30c8d299ba48ee919064628fd8bc296bdc6e4827d315491bea39437130d3e1", + "sha256:6f0c8807bac16984901c0573725bad786f2f004f9bd5df8476c6431097b6c5b3", + "sha256:6f62c8a59f6b8e608880c61b138ae22668184bc266b025d33200dcf2cebe0872", + "sha256:74d4aabd612d32282f3cb3ebb4436046fb840d25c754157a755bc9f66e7cd307", + "sha256:7658d2dfc1dabfb008ffe12ae47b98559e2aedd8237bee12f5aafb74d90479e3", + "sha256:777ceea2860a48e9e362a4e2a9a691782ea97bd05c24627c92e876fdd2c22e61", + "sha256:79f34d5833cd0d53ecf48bc030e4da3216bd4846224d17eeb64509be5cb098fd", + "sha256:7a52d547259495a53e61e37ffc6d5cecf8d298aeb1bc0d9b25289d65ddb31183", + "sha256:840063aa8eeb1dda07d7d7dee15648838bffef1d415f5f79061854a182a429aa", + "sha256:8e8ec94d1b1a0a297c2c69a0bf000baf9a79607ca0c084f577f811a9b447c319", + "sha256:95fb62a3980cf43e76c2fe95edab06ec70dc495b8aa660975eb9f0b2ffdae1e1", + "sha256:9668da78bcc219542467f51c2cd01894222be6aceec4b5efb806705900b794d8", + "sha256:99a7c5786de9e92ff5ffee2e8bed745f5d25495206f3f14656c379031e518334", + "sha256:a1e283ad918df44bad3ccf042c2fe283c63d17617570eb91b8c370ef677b0b83", + "sha256:a25d251546acb5edb1635631c4ae0e330fa4ec7c6316c01d256728fbfb9bbff2", + "sha256:abe1adb32e2535aaa171e8b2b2d3f083f863c9974a3e6e7dae6bf4827fc8b983", + "sha256:ae85112da2d826b65aa7c7369c56ca41d9a89644312172979cbee5cf788e0b09", + "sha256:b3379a83dc63fe06538c751961f9ed730b5d7f08f96a57bbad8d52db5820df1f", + "sha256:b3c7c6c4aac19b964c1d12784aecae7f0315314640b0f41dd6f0d4e2bf439072", + "sha256:b7ddecc80e87acf12c2cf12bf3721def47188c403f04e706f104b5e71fed2f31", + "sha256:bbaf6785d3f1cd3e617b9d0fb3c5528023ef7bc7cc1356234801dc1941df8ce9", + "sha256:be6f5b453f7ed2219a9555bb6840663950b9ab1dc034216f68eac64db66633c2", + "sha256:c2b6404631b22617b5127c6de2355393ccda693ca733a098b6802e7dabb3457a", + "sha256:c4f6609f6e867a58cdf173e1cbe1f3736d25962108bd5cb01ad5a130875ff2c8", + "sha256:c76dd2c0615a28de21c97f9f6862e84faef58ff4d700196b4e395ef6a52291e4", + "sha256:c78c72f7878071a78337510ec78ab856d60b4bdcd3a95fd68b939e7cb30434b3", + "sha256:cb0c9a1476d279524538ba9a00ecec9eadcef31a6a60b2c8bd2f29f62044a559", + "sha256:ccb986e80674c929f198464bce55e995178dea26833421e2479ff04a6956afac", + "sha256:cfa62063c5eafb04e4435459ce15746b4ae6c14efeae8f16bd0e3d2895dad698", + "sha256:d13bd83284b46c304eb10de93f8a3f2c80361f91f4e8a4e1273caf83e16c4409", + "sha256:d76e585368388d99ddd2f95989e6ac80a8fe23115e93931faad99fa34550612f", + "sha256:dc32029b9cc784a529f9201289d4f841cc24a2ae3126a112cd467bc41bbc2f10", + "sha256:e0b55651db770b4b5a6c7d015f24d1a6ede307296bbdf0c47fc5f6a6adc7abee", + "sha256:e37886929ee83a5fa5c73164abada00e7f3cc1cbf3f8f6e1e8cfecae9d6cfc47", + "sha256:f7b88bc7306136b123fd1a9beed16ca02900ee31d1c36e73fa33d9e525a5562d", + "sha256:fac611bde2609a46fcbd92da7171286faa2f5c191f84d22f61cd7dc27213f51d", + "sha256:fafed60103132e74cdfbd651abe94801eb87a9765ce275b3dca9af8f3e06622a" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.8.17" + }, + "requests": { + "hashes": [ + "sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983", + "sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349" + ], + "markers": "python_version >= '3.7' and python_version < '4'", + "version": "==2.28.1" + }, + "rich": { + "hashes": [ + "sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb", + "sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca" + ], + "markers": "python_version < '4' and python_full_version >= '3.6.3'", + "version": "==12.5.1" + }, + "semver": { + "hashes": [ + "sha256:ced8b23dceb22134307c1b8abfa523da14198793d9787ac838e70e29e77458d4", + "sha256:fa0fe2722ee1c3f57eac478820c3a5ae2f624af8264cbdf9000c980ff7f75e3f" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.13.0" + }, + "six": { + "hashes": [ + "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", + "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.16.0" + }, + "smmap": { + "hashes": [ + "sha256:2aba19d6a040e78d8b09de5c57e96207b09ed71d8e55ce0959eeee6c8e190d94", + "sha256:c840e62059cd3be204b0c9c9f74be2c09d5648eddd4580d9314c3ecde0b30936" + ], + "markers": "python_version >= '3.6'", + "version": "==5.0.0" + }, + "soupsieve": { + "hashes": [ + "sha256:3b2503d3c7084a42b1ebd08116e5f81aadfaea95863628c80a3b774a11b7c759", + "sha256:fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d" + ], + "markers": "python_version >= '3.6'", + "version": "==2.3.2.post1" + }, + "streamlit": { + "hashes": [ + "sha256:c56d0775feb39116ff90a8b01ee15be27212ee50abb88943607205d26d1d9923", + "sha256:f0461bebd6c1b58c38f0f602ee9bb6699f66dfe14fd2e05abc25ebe96ff4ba21" + ], + "index": "pypi", + "version": "==1.12.2" + }, + "tokenizers": { + "hashes": [ + "sha256:01abe6fbfe55e4131ca0c4c3d1a9d7ef5df424a8d536e998d2a4fc0bc57935f4", + "sha256:070746f86efa6c873db341e55cf17bb5e7bdd5450330ca8eca542f5c3dab2c66", + "sha256:0bf2380ad59c50222959a9b6f231339200a826fc5cb2be09ff96d8a59f65fc5e", + "sha256:2158baf80cbc09259bfd6e0e0fc4597b611e7a72ad5443dad63918a90f1dd304", + "sha256:230f51a0a82ca7b90077eaca2415f12ff9bd144607888b9c50c2ee543452322e", + "sha256:258873634406bd1d438c799993a5e44bbc0132ff055985c03c4fe30f702e9a33", + "sha256:27d93b712aa2d4346aa506ecd4ec9e94edeebeaf2d484357b482cdeffc02b5f5", + "sha256:28825dade9e52ad464164020758f9d49eb7251c32b6ae146601c506a23c67c0e", + "sha256:38625595b2fd37bfcce64ff9bfb6868c07e9a7b7f205c909d94a615ce9472287", + "sha256:3f2647cc256d6a53d18b9dcd71d377828e9f8991fbcbd6fcd8ca2ceb174552b0", + "sha256:411ebc89228f30218ffa9d9c49d414864b0df5026a47c24820431821c4360460", + "sha256:419d113e3bcc4fe20a313afc47af81e62906306b08fe1601e1443d747d46af1f", + "sha256:5188e13fc09edfe05712ca3ae5a44e7f2b0137927b1ca210d0fad90d3e58315a", + "sha256:53b5f4012ce3ffddd5b00827441b80dc7a0f6b41f4fc5248ae6d36e7d3920c6d", + "sha256:619728df2551bdfe6f96ff177f9ded958e7ed9e2af94c8d5ac2834d1eb06d112", + "sha256:62a723bd4b18bc55121f5c34cd8efd6c651f2d3b81f81dd50e5351fb65b8a617", + "sha256:664f36f0a0d409c24f2201d495161fec4d8bc93e091fbb78814eb426f29905a3", + "sha256:6a38b2019d4807d42afeff603a119094ee00f63bea2921136524c8814e9003f8", + "sha256:6a7a106d04154c2159db6cd7d042af2e2e0e53aee432f872fe6c8be45100436a", + "sha256:7c5c54080a7d5c89c990e0d478e0882dbac88926d43323a3aa236492a3c9455f", + "sha256:7d43de14b4469b57490dbaf136a31c266cb676fa22320f01f230af9219ae9034", + "sha256:7f4cb68dc538b52240d1986d2034eb0a6373be2ab5f0787d1be3ad1444ce71b7", + "sha256:8cea98f3f9577d1541b7bb0f7a3308a911751067e1d83e01485c9d3411bbf087", + "sha256:8d4339c376b695de2ad8ccaebffa75e4dc1d7857be1103d80e7925b34af8cf78", + "sha256:91906d725cb84d8ee71ce05fbb155d39d494849622b4f9349e5176a8eb01c49b", + "sha256:ae6c04b629ac2cd2f695739988cb70b9bd8d5e7f849f5b14c4510e942bee5770", + "sha256:b9779944559cb7ace6a8516e402895f239b0d9d3c833c67dbaec496310e7e206", + "sha256:bdbca79726fe883c696088ea163715b2f902aec638a8e24bcf9790ff8fa45019", + "sha256:cdeba37c2fb44e1aec8a72af4cb369655b59ba313181b1b4b8183f08e759c49c", + "sha256:d737df0f8f26e093a82bfb106b6cfb510a0e9302d35834568e5b20b73ddc5a9c", + "sha256:eff5ff411f18a201eec137b7b32fcb55e0c48b372d370bd24f965f5bad471fa4", + "sha256:f1271224acafb27639c432e1ce4e7d38eab40305ba1c546e871d5c8a32f4f195", + "sha256:fde8dccb9033fa344ffce3ee1837939a50e7a210a768f1cf2059beeafa755481" + ], + "version": "==0.12.1" + }, + "toml": { + "hashes": [ + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" + ], + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==0.10.2" + }, + "toolz": { + "hashes": [ + "sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f", + "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194" + ], + "markers": "python_version >= '3.5'", + "version": "==0.12.0" + }, + "torch": { + "hashes": [ + "sha256:03e31c37711db2cd201e02de5826de875529e45a55631d317aadce2f1ed45aa8", + "sha256:0b44601ec56f7dd44ad8afc00846051162ef9c26a8579dda0a02194327f2d55e", + "sha256:42e115dab26f60c29e298559dbec88444175528b729ae994ec4c65d56fe267dd", + "sha256:42f639501928caabb9d1d55ddd17f07cd694de146686c24489ab8c615c2871f2", + "sha256:4e1b9c14cf13fd2ab8d769529050629a0e68a6fc5cb8e84b4a3cc1dd8c4fe541", + "sha256:68104e4715a55c4bb29a85c6a8d57d820e0757da363be1ba680fa8cc5be17b52", + "sha256:69fe2cae7c39ccadd65a123793d30e0db881f1c1927945519c5c17323131437e", + "sha256:6cf6f54b43c0c30335428195589bd00e764a6d27f3b9ba637aaa8c11aaf93073", + "sha256:743784ccea0dc8f2a3fe6a536bec8c4763bd82c1352f314937cb4008d4805de1", + "sha256:8a34a2fbbaa07c921e1b203f59d3d6e00ed379f2b384445773bd14e328a5b6c8", + "sha256:976c3f997cea38ee91a0dd3c3a42322785414748d1761ef926b789dfa97c6134", + "sha256:9b356aea223772cd754edb4d9ecf2a025909b8615a7668ac7d5130f86e7ec421", + "sha256:9c038662db894a23e49e385df13d47b2a777ffd56d9bcd5b832593fab0a7e286", + "sha256:a8320ba9ad87e80ca5a6a016e46ada4d1ba0c54626e135d99b2129a4541c509d", + "sha256:b5dbcca369800ce99ba7ae6dee3466607a66958afca3b740690d88168752abcf", + "sha256:bfec2843daa654f04fda23ba823af03e7b6f7650a873cdb726752d0e3718dada", + "sha256:cd26d8c5640c3a28c526d41ccdca14cf1cbca0d0f2e14e8263a7ac17194ab1d2", + "sha256:e9c8f4a311ac29fc7e8e955cfb7733deb5dbe1bdaabf5d4af2765695824b7e0d", + "sha256:f00c721f489089dc6364a01fd84906348fe02243d0af737f944fddb36003400d", + "sha256:f3b52a634e62821e747e872084ab32fbcb01b7fa7dbb7471b6218279f02a178a" + ], + "markers": "python_version >= '3.7'", + "version": "==1.12.1" + }, + "tornado": { + "hashes": [ + "sha256:1d54d13ab8414ed44de07efecb97d4ef7c39f7438cf5e976ccd356bebb1b5fca", + "sha256:20f638fd8cc85f3cbae3c732326e96addff0a15e22d80f049e00121651e82e72", + "sha256:5c87076709343557ef8032934ce5f637dbb552efa7b21d08e89ae7619ed0eb23", + "sha256:5f8c52d219d4995388119af7ccaa0bcec289535747620116a58d830e7c25d8a8", + "sha256:6fdfabffd8dfcb6cf887428849d30cf19a3ea34c2c248461e1f7d718ad30b66b", + "sha256:87dcafae3e884462f90c90ecc200defe5e580a7fbbb4365eda7c7c1eb809ebc9", + "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13", + "sha256:b8150f721c101abdef99073bf66d3903e292d851bee51910839831caba341a75", + "sha256:ba09ef14ca9893954244fd872798b4ccb2367c165946ce2dd7376aebdde8e3ac", + "sha256:d3a2f5999215a3a06a4fc218026cd84c61b8b2b40ac5296a6db1f1451ef04c1e", + "sha256:e5f923aa6a47e133d1cf87d60700889d7eae68988704e20c75fb2d65677a8e4b" + ], + "markers": "python_version >= '3.7'", + "version": "==6.2" + }, + "tqdm": { + "hashes": [ + "sha256:40be55d30e200777a307a7585aee69e4eabb46b4ec6a4b4a5f2d9f11e7d5408d", + "sha256:74a2cdefe14d11442cedf3ba4e21a3b84ff9a2dbdc6cfae2c34addb2a14a5ea6" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==4.64.0" + }, + "transformers": { + "hashes": [ + "sha256:07f3df80144f7f032ad5d367507445980d4aa25855a5d658cfa47ac5fff32aca", + "sha256:c03c150857e2d8f18f6dcb51e3061207522425ad57a0a6dc6cde407226a45e3e" + ], + "markers": "python_version >= '3.7'", + "version": "==4.21.2" + }, + "typing-extensions": { + "hashes": [ + "sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02", + "sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6" + ], + "markers": "python_version >= '3.7'", + "version": "==4.3.0" + }, + "tzdata": { + "hashes": [ + "sha256:21f4f0d7241572efa7f7a4fdabb052e61b55dc48274e6842697ccdf5253e5451", + "sha256:c3119520447d68ef3eb8187a55a4f44fa455f30eb1b4238fa5691ba094f2b05b" + ], + "markers": "python_version >= '3.6'", + "version": "==2022.2" + }, + "tzlocal": { + "hashes": [ + "sha256:89885494684c929d9191c57aa27502afc87a579be5cdd3225c77c463ea043745", + "sha256:ee5842fa3a795f023514ac2d801c4a81d1743bbe642e3940143326b3a00addd7" + ], + "markers": "python_version >= '3.6'", + "version": "==4.2" + }, + "urllib3": { + "hashes": [ + "sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e", + "sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5' and python_version < '4'", + "version": "==1.26.12" + }, + "validators": { + "hashes": [ + "sha256:24148ce4e64100a2d5e267233e23e7afeb55316b47d30faae7eb6e7292bc226a" + ], + "markers": "python_version >= '3.4'", + "version": "==0.20.0" + }, + "zipp": { + "hashes": [ + "sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2", + "sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009" + ], + "markers": "python_version >= '3.7'", + "version": "==3.8.1" + } + }, + "develop": {} +} diff --git a/twNLP-app/README.md b/twNLP-app/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7859afdd5e7087c336965ed62d241cd8380de879 --- /dev/null +++ b/twNLP-app/README.md @@ -0,0 +1,45 @@ +# **ckip-cwn-app** +中文很適合拿來探索自然語言處理管線的標記語言。這個探索過程也可反應生成語言架構多年來牽制語言理論,以及它對於自然語言處理與理解的想像影響。 + +## **Documentation** +### 1. Installation + +1. Python version + * `python == 3.7.5` + +2. Clone repository + + ```bash + git clone git@github.com:lopentu/nlp_web.git + ``` + +3. Install Requirement + ```bash + cd nlp_web/assignments/twNLP-app && pip install -r requirements.txt + ``` + + +### 2. Start the app +There are two main ways to run the app: + +- run with Python + + First make sure you are in the `twNLP-app` folder, and then simply run: + + ```bash + streamlit run src/app.py + ``` + +- run with Docker + + Install Docker, and user the following command to run: + + ```bash + docker-compose up + ``` + + Then acces `http://localhost` in the browser. + + +## Contact +If you have any suggestion or question, please do not hesitate to email us at shukai@gmail.com or r07142010@g.ntu.edu.tw diff --git a/twNLP-app/deployment/nginx.conf b/twNLP-app/deployment/nginx.conf new file mode 100644 index 0000000000000000000000000000000000000000..fc238bcb6ba558803a3d32c2b82f3467492f26b6 --- /dev/null +++ b/twNLP-app/deployment/nginx.conf @@ -0,0 +1,30 @@ +worker_processes auto; + +events { + worker_connections 1024; + multi_accept on; +} + +http { + map $http_upgrade $connection_upgrade { + default upgrade; + '' close; + } + + upstream app { + server app:8501; + } + + server { + listen 80; + charset utf-8; + + location / { + proxy_pass http://app; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + auth_basic off; + } + } +} \ No newline at end of file diff --git a/twNLP-app/docker-compose.yml b/twNLP-app/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..ed07eb9ff9be3a928ebad287af264efd4a78a322 --- /dev/null +++ b/twNLP-app/docker-compose.yml @@ -0,0 +1,20 @@ +version: "3" + +services: + app: + build: ./ + ports: + - 8501:8501 + command: "streamlit run ./src/app.py" + + nginx: + image: "nginx:stable" + depends_on: + - app + links: + - app + restart: always + ports: + - 80:80 + volumes: + - "./deployment/nginx.conf:/etc/nginx/nginx.conf" \ No newline at end of file diff --git a/twNLP-app/requirements.txt b/twNLP-app/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4735b8f3f21cca73aa06fae5507e520230fcaaed --- /dev/null +++ b/twNLP-app/requirements.txt @@ -0,0 +1,7 @@ +aiofiles==0.8.0 +cwngraph==0.4.0 +distiltag==0.2.2 +streamlit==1.12.2 +python-dotenv==0.20.0 +cwnsensetagger==0.1.6 +ckip-transformers==0.3.2 \ No newline at end of file diff --git a/twNLP-app/src/.DS_Store b/twNLP-app/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..d0da439dffcc3253b60c3efbd08401ed8b1d1bf9 Binary files /dev/null and b/twNLP-app/src/.DS_Store differ diff --git a/twNLP-app/src/app.py b/twNLP-app/src/app.py new file mode 100644 index 0000000000000000000000000000000000000000..b45e21dd8678f03476bffd73b9921a18505f7630 --- /dev/null +++ b/twNLP-app/src/app.py @@ -0,0 +1,111 @@ +#R11142005 紀柔安 +import streamlit as st +from views.components.spinner import dowload_ckip_package, download_cwn_drivers +import pandas as pd +import requests +import bs4 +from snownlp import SnowNLP + + +def run_app(ckip_nlp_models, cwn_upgrade) -> None: + # need to download first because CWN packages will first check whether + # there is .cwn_graph folder in the root directory. + download_cwn_drivers(cwn_upgrade) + dowload_ckip_package(ckip_nlp_models) + + from views.components.sidebar import visualize_side_bar + from views.containers import display_cwn, display_ckip, display_data_form + + st.title("NLP app for PTT") + st.write("這是一個針對PTT語料的 情緒分析|中文NLP管線處理🔎") + st.image("/Users/joannechi/nlpWeb/myApp/nlpweb/nlp_assignment_1/img/Mo-PTT-Logo.png", width=200) + + #menu = ["Text","Sentences"] + #choice = st.sidebar.selectbox("Menu",menu) + + + #spectra = st.file_uploader("upload your file", type={"csv", "txt"}) + #if spectra is not None: + # spectra_df = pd.read_csv(spectra) #讀取csv + # st.write(spectra_df) + + #~~web crawler~~ + st.subheader("PTT Crawler 🐛") + st.text('目前看板有:HatePolitics|Gossiping|Military|Stock') + selected = st.selectbox('請選擇看板:', + ['HatePolitics', 'Gossiping','Military','Stock']) + if selected=='HatePolitics': + URL = "https://www.ptt.cc/bbs/HatePolitics/index.html" + elif selected=='Gossiping': + URL = "https://www.ptt.cc/bbs/Gossiping/index.html" + elif selected=='Military': + URL = "https://www.ptt.cc/bbs/Military/index.html" + else: + URL = "https://www.ptt.cc/bbs/Stock/index.html" + + my_headers = {'cookie': 'over18=1;'} + response = requests.get(URL, headers = my_headers) + soup = bs4.BeautifulSoup(response.text,"html.parser") + list_results=[] + for t in soup.find_all('div','title'): + find_a=t.find('a') + find_href="https://www.ptt.cc"+find_a.get("href") + title=t.text + results={ + "title":title, + "url":find_href + } + list_results.append(results) + my_df=pd.DataFrame(list_results) + print(my_df) + st.write(my_df) + #~~web crawler~~ + + #~~sentiment analysis~~ + st.subheader("情緒分析") + with st.form(key="nlpForm"): + raw_text=st.text_area("請輸入句子✏️") + submit_button=st.form_submit_button(label="確定") + + if submit_button: + + st.info("sentiment") + sentiment=SnowNLP(SnowNLP(raw_text).han) #轉簡體 + sentiment_han=sentiment.sentiments + st.write(sentiment_han) + + #emoji + if sentiment_han>0: + st.markdown("Sentiment:: Positive :smiley: ") + elif sentiment_han<0: + st.markdown("Sentiment:: Negative :angry: ") + else: + st.markdown("Sentiment:: Neutral :neutral: ") + + + + #with col2: + #st.info("category") + #category=SnowNLP(SnowNLP(raw_text).han) #轉簡體 + #category_han=list(category.tags) + #st.write(category_han) + + #~~sentiment analysis~~ + + st.subheader("中文 NLP 管線處理") + + input_data = display_data_form() + model, pipeline, active_visualizers = visualize_side_bar(ckip_nlp_models) + #return model_options, pipeline_options, active_visualizers + + display_factories = {"CKIP": display_ckip, "CWN": display_cwn} + + if "input_data" in st.session_state: + display_factories[pipeline]( + model, active_visualizers, st.session_state["input_data"] + ) + + +if __name__ == "__main__": + ckip_nlp_models = ["bert-base", "albert-tiny", "bert-tiny", "albert-base"] + run_app(ckip_nlp_models, cwn_upgrade=False) diff --git a/twNLP-app/src/configs/__init__.py b/twNLP-app/src/configs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ebd2cc2210f3793e424c9630ce14e0456fb7c998 --- /dev/null +++ b/twNLP-app/src/configs/__init__.py @@ -0,0 +1,10 @@ +from .ckip import ckip_path, download_ckip_drivers +from .cwn import cwn_model_path, download_cwn_models + + +__all__ = [ + "ckip_path", + "download_ckip_drivers", + "cwn_model_path", + "download_cwn_models", +] diff --git a/twNLP-app/src/configs/ckip.py b/twNLP-app/src/configs/ckip.py new file mode 100644 index 0000000000000000000000000000000000000000..59a21e59950ea1723357d05d236b16e87d89e31f --- /dev/null +++ b/twNLP-app/src/configs/ckip.py @@ -0,0 +1,36 @@ +import pickle +import asyncio +import aiofiles +from pathlib import Path + + +pkg_path = Path("__file__").resolve().parent / "src" +ckip_path = pkg_path / "models" / "ckip" + + +async def write_drivers(nlp_model: str) -> None: + """The write drivers function writes the ckip drivers to pickle files asynchronously. + Args: + nlp_model (str): the nlp model name + """ + from ckip_transformers.nlp import ( + CkipWordSegmenter, + CkipPosTagger, + CkipNerChunker, + ) + + drivers = ( + CkipWordSegmenter(model=nlp_model), + CkipPosTagger(model=nlp_model), + CkipNerChunker(model=nlp_model), + ) + + driver_path = ckip_path / f"{nlp_model}_drivers.pickle" + async with aiofiles.open(driver_path, mode="wb") as file: + result = pickle.dumps(drivers) + await file.write(result) + print(f"{nlp_model}_drivers.pickle done!") + + +async def download_ckip_drivers(ckip_nlp_models): + await asyncio.gather(*list(map(write_drivers, ckip_nlp_models))) diff --git a/twNLP-app/src/configs/cwn.py b/twNLP-app/src/configs/cwn.py new file mode 100644 index 0000000000000000000000000000000000000000..514d57ca6a54fbff856d5762eb5c2a60c694a85f --- /dev/null +++ b/twNLP-app/src/configs/cwn.py @@ -0,0 +1,14 @@ +from pathlib import Path +from typing import Optional + + +cwn_model_path = Path.home().resolve() / ".cwn_graph" + + +def download_cwn_models(upgrade: Optional[bool] = False): + import CwnSenseTagger, DistilTag + from CwnGraph import CwnImage + + DistilTag.download(upgrade=upgrade) + CwnSenseTagger.download(upgrade=upgrade) + CwnImage.latest() diff --git a/twNLP-app/src/context/__init__.py b/twNLP-app/src/context/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6933961beaccfbd1281371dad3cb43dca0c05a7a --- /dev/null +++ b/twNLP-app/src/context/__init__.py @@ -0,0 +1,5 @@ +from .wsg import use_WSG +from .actions import WSGKind + + +__all__ = ["use_WSG", "WSGKind"] diff --git a/twNLP-app/src/context/actions.py b/twNLP-app/src/context/actions.py new file mode 100644 index 0000000000000000000000000000000000000000..58a31ef8717a5485d65c75370acbf696d3261702 --- /dev/null +++ b/twNLP-app/src/context/actions.py @@ -0,0 +1,30 @@ +from typing import Any +from enum import Enum, auto +from types import SimpleNamespace +from dataclasses import dataclass, FrozenInstanceError + + +class WSGKind(Enum): + ADD_WSG = auto() + RESET = auto() + + +class FrozenSimpleNamespace(SimpleNamespace): + def __init__(self, **kwargs): + super().__init__(**kwargs) + + def __setattr__(self, name: str, value: Any) -> None: + raise FrozenInstanceError(f"cannot assign to field '{name}'") + + +@dataclass(frozen=True) +class Action: + """ + The Action object contains the payload of information. + """ + + kind: WSGKind + payload: dict + + def __post_init__(self): + super().__setattr__("payload", FrozenSimpleNamespace(**self.payload)) diff --git a/twNLP-app/src/context/reducer.py b/twNLP-app/src/context/reducer.py new file mode 100644 index 0000000000000000000000000000000000000000..ea1afe70685eee6c7a4a041494ffb79e0aa56025 --- /dev/null +++ b/twNLP-app/src/context/reducer.py @@ -0,0 +1,13 @@ +from typing import Any, Union +from .actions import WSGKind, Action + + +def reducer(state, action: Action) -> Union[str, Any]: + """The reducer function generates new states.""" + + initial_state = (state != None) if state else "" + + if action["kind"] == WSGKind.ADD_WSG: + return action["payload"] + + return initial_state diff --git a/twNLP-app/src/context/wsg.py b/twNLP-app/src/context/wsg.py new file mode 100644 index 0000000000000000000000000000000000000000..774649a019b536ce6f30b0a73b5ca6fbf8cfa85a --- /dev/null +++ b/twNLP-app/src/context/wsg.py @@ -0,0 +1,19 @@ +from .reducer import reducer +from utils.stores import Store +from typing import Callable, List, Union + +wsg_store = None + + +def return_value(): + return wsg_store.get_state() + + +wsg_store = Store(reducer) + +wsg_store.add_listener(return_value) + + +def use_WSG() -> List[Union[str, Callable]]: + """The use_WSG function contains wsg result and and wsg dispatcher.""" + return [wsg_store.get_state(), wsg_store.dispatch] diff --git a/twNLP-app/src/controllers/ckip/__init__.py b/twNLP-app/src/controllers/ckip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..22de1fcd971004bd4c8e15f13e64d080386b2860 --- /dev/null +++ b/twNLP-app/src/controllers/ckip/__init__.py @@ -0,0 +1,6 @@ +from .ner import handle_create_ner +from .pos import handle_create_pos +from .wsg import handle_create_wsg + + +__all__ = ["handle_create_ner", "handle_create_pos", "handle_create_wsg"] diff --git a/twNLP-app/src/controllers/ckip/ner.py b/twNLP-app/src/controllers/ckip/ner.py new file mode 100644 index 0000000000000000000000000000000000000000..3246cafd0cf7a86b0ec1b96bc820b69b376b37a9 --- /dev/null +++ b/twNLP-app/src/controllers/ckip/ner.py @@ -0,0 +1,32 @@ +import asyncio +from typing import List, Union +from models import connect_ckip_drivers +from ckip_transformers.nlp.util import NerToken +from utils.ckip.ner import chunk_multiple_entities + + +def is_list_of_empty_list(ner_token_list: List[Union[NerToken, None]]) -> bool: + """The is_list_of_empty_list function checks whether a list is full of empty lists. + Args: + ner_token_list (list): the result of the ner driver + Returns: + a bool + """ + return all(map(lambda value: not value, ner_token_list)) + + +def handle_create_ner(nlp_model: str, sentence_list: List[str]) -> List[str]: + """The handle_create_ner function handles the request that deals with NER. + Args: + nlp_model (str): the nlp model name + sentence_list (list): a list of sentences + Returns: + a list of strings + """ + ner_driver = connect_ckip_drivers(nlp_model)[2] + ner_token_list = ner_driver(sentence_list) + + if is_list_of_empty_list(ner_token_list): + return sentence_list + + return asyncio.run(chunk_multiple_entities(zip(sentence_list, ner_token_list))) diff --git a/twNLP-app/src/controllers/ckip/pos.py b/twNLP-app/src/controllers/ckip/pos.py new file mode 100644 index 0000000000000000000000000000000000000000..9ecbc0652958156a3b4df1807fb1b395a6da3904 --- /dev/null +++ b/twNLP-app/src/controllers/ckip/pos.py @@ -0,0 +1,18 @@ +import asyncio +from typing import List +from context import use_WSG +from utils.ckip.pos import PosTagging +from utils.text import add_multiple_textsubscripts + + +def handle_create_pos(nlp_model: str, sentence_list: List[str]): + """The handle_create_pos function handles the request that deals with pos-tagging. + Args: + sentence_list (list): a list of sentences + Returns: + a list of strings + """ + + ws_result = use_WSG()[0] + segmented_result = PosTagging(nlp_model, ws_result).tag() + return asyncio.run(add_multiple_textsubscripts("pos", segmented_result)) diff --git a/twNLP-app/src/controllers/ckip/wsg.py b/twNLP-app/src/controllers/ckip/wsg.py new file mode 100644 index 0000000000000000000000000000000000000000..308f3eb58bc290c05f163bbc03b341a6e40e3234 --- /dev/null +++ b/twNLP-app/src/controllers/ckip/wsg.py @@ -0,0 +1,20 @@ +import asyncio +from typing import List +from context import use_WSG, WSGKind +from utils.ckip.wsg import WordSegmentation +from utils.text import add_multiple_textsubscripts + + +def handle_create_wsg(nlp_model: str, sentence_list: List[str]) -> List[str]: + """The handle_create_wsg function handles the request that deals with word + segmentation. + Args: + sentence_list (list): a list of sentences + Returns: + a list of strings + """ + + ws_result = WordSegmentation(nlp_model, sentence_list).segment() + dispatch = use_WSG()[1] + dispatch({"kind": WSGKind.ADD_WSG, "payload": ws_result}) + return asyncio.run(add_multiple_textsubscripts("ws", ws_result)) \ No newline at end of file diff --git a/twNLP-app/src/controllers/cwn/__init__.py b/twNLP-app/src/controllers/cwn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6436dc0c0370170b9b74ad047bf9f8872543345c --- /dev/null +++ b/twNLP-app/src/controllers/cwn/__init__.py @@ -0,0 +1,4 @@ +from .cwn_sense_tag import handle_create_cwn_tags + + +__all__ = ["handle_cwn_sense_tag"] diff --git a/twNLP-app/src/controllers/cwn/cwn_sense_tag.py b/twNLP-app/src/controllers/cwn/cwn_sense_tag.py new file mode 100644 index 0000000000000000000000000000000000000000..9682dd9481ac1537f51750e661e2f5a3ba19f9ac --- /dev/null +++ b/twNLP-app/src/controllers/cwn/cwn_sense_tag.py @@ -0,0 +1,25 @@ +import asyncio +from typing import List +from utils.text import add_multiple_textsubscripts +from utils.cwn import disambiguate_word_sense, create_cwn_sense_tags + + +async def create_tags(segmented_result: List[str]): + """The create_tags function runs two asynchronous operations (i.e. + `add_multiple_textsubscripts` and `create_cwn_sense_tags`). + Args: + segmented_result (list) + """ + + return await asyncio.gather( + *[ + add_multiple_textsubscripts("cwn", segmented_result), + create_cwn_sense_tags(segmented_result), + ] + ) + + +def handle_create_cwn_tags(sentence_list: List[str]) -> List[str]: + segmented_result = asyncio.run(disambiguate_word_sense(sentence_list)) + span_tags, cwn_tags = asyncio.run(create_tags(segmented_result)) + return span_tags, cwn_tags \ No newline at end of file diff --git a/twNLP-app/src/models/.DS_Store b/twNLP-app/src/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f2128adf2793ae17e36efbcfcdf8d5931ed82151 Binary files /dev/null and b/twNLP-app/src/models/.DS_Store differ diff --git a/twNLP-app/src/models/__init__.py b/twNLP-app/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..067648579f79ee55e314ea4cddfc8d5028abedd0 --- /dev/null +++ b/twNLP-app/src/models/__init__.py @@ -0,0 +1,4 @@ +from .ckip import connect_ckip_drivers + + +__all__ = ["connect_ckip_drivers"] diff --git a/twNLP-app/src/models/ckip/__init__.py b/twNLP-app/src/models/ckip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..067648579f79ee55e314ea4cddfc8d5028abedd0 --- /dev/null +++ b/twNLP-app/src/models/ckip/__init__.py @@ -0,0 +1,4 @@ +from .ckip import connect_ckip_drivers + + +__all__ = ["connect_ckip_drivers"] diff --git a/twNLP-app/src/models/ckip/ckip.py b/twNLP-app/src/models/ckip/ckip.py new file mode 100644 index 0000000000000000000000000000000000000000..4022005b1950e36979d570c493d45cb452cc5ffb --- /dev/null +++ b/twNLP-app/src/models/ckip/ckip.py @@ -0,0 +1,17 @@ +import pickle +from configs import ckip_path + + +def connect_ckip_drivers(nlp_model: str) -> tuple: + """The connect_ckip_drivers function connects to the ckip drivers. + + Args: + nlp_model (str): the nlp model name + Returns: + a tuple, containing CkipWordSegmenter, CkipPosTagger and CkipNerChunker. + """ + + driver_path = ckip_path / f"{nlp_model}_drivers.pickle" + + with open(driver_path, "rb") as file: + return pickle.load(file) \ No newline at end of file diff --git a/twNLP-app/src/utils/ckip/ner/__init__.py b/twNLP-app/src/utils/ckip/ner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..98ce5dbe4915da8d795b5d31cbb7599cec841962 --- /dev/null +++ b/twNLP-app/src/utils/ckip/ner/__init__.py @@ -0,0 +1,4 @@ +from .chunker import chunk_entities, chunk_multiple_entities + + +__all__ = ["chunk_entities", "chunk_multiple_entities"] diff --git a/twNLP-app/src/utils/ckip/ner/chunker.py b/twNLP-app/src/utils/ckip/ner/chunker.py new file mode 100644 index 0000000000000000000000000000000000000000..45e2f75f4145b795fc30baf771d2b52dbe2352f1 --- /dev/null +++ b/twNLP-app/src/utils/ckip/ner/chunker.py @@ -0,0 +1,13 @@ +import asyncio +from .ner import create_ner +from typing import List, Tuple, Union +from ckip_transformers.nlp.util import NerToken + + +async def chunk_entities(zip_value: Tuple[Union[str, NerToken]]) -> str: + sentence, ner_token_list = zip_value + return create_ner(sentence, ner_token_list) + + +async def chunk_multiple_entities(data: zip) -> List[str]: + return await asyncio.gather(*list(map(chunk_entities, data))) \ No newline at end of file diff --git a/twNLP-app/src/utils/ckip/ner/ner.py b/twNLP-app/src/utils/ckip/ner/ner.py new file mode 100644 index 0000000000000000000000000000000000000000..7f971d01832f39e84758d57f6e318a4276f64dec --- /dev/null +++ b/twNLP-app/src/utils/ckip/ner/ner.py @@ -0,0 +1,67 @@ +from functools import lru_cache +from ...text import create_entity_color +from typing import List, Tuple, Optional +from ckip_transformers.nlp.util import NerToken + + +def add_textsubscript(ner_token_list: List[NerToken]) -> Tuple[Tuple[str]]: + """The add_textsubscript function combines the token word and the + NER-tag, and specifies the NER-tag to be displayed as subscript. + Args: + ner_token_list (NerToken): a list of NerToken + Returns: + a tuple: ( + ("傅達仁PERSON", (0, 3)) + ... + ) + """ + + combine = lambda value: ( + f"{value.word}{value.ner}", + value.idx, + ) + return tuple(map(combine, ner_token_list)) + + +@lru_cache(maxsize=None) +def modify_sentence( + span_tuple: Tuple[Tuple[str]], sentence: str, increased_len: Optional[int] = 0 +) -> str: + if len(list(span_tuple)) == 1: + span_list = list(span_tuple) + modified_word, index = span_list[0] + start_index, end_index = index + start_index += increased_len + end_index += increased_len + return "".join((sentence[:start_index], modified_word, sentence[end_index:])) + + span_list = list(span_tuple) + modified_word, index = span_list.pop(0) + span_tuple = tuple(span_list) + + start_index, end_index = index + + if increased_len: + start_index += increased_len + end_index += increased_len + + original_word = sentence[start_index:end_index] + modified_sentence = "".join( + (sentence[:start_index], modified_word, sentence[end_index:]) + ) + + index_gap = len(modified_word) - len(original_word) + return modify_sentence(span_tuple, modified_sentence, increased_len + index_gap) + + +def create_ner(sentence: str, ner_token_list: List[NerToken]) -> str: + """The replace_entities function replaces words that are recognized + as the token words with opening and closing span tags. + Args: + sentence (str): the orignal sentence + ner_token_list (NerToken): a list of NerToken + Returns: + a str + """ + modified_ner_token_list = add_textsubscript(ner_token_list) + return modify_sentence(modified_ner_token_list, sentence) diff --git a/twNLP-app/src/utils/ckip/pos/__init__.py b/twNLP-app/src/utils/ckip/pos/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..30b5eea7788730079ad11214080a2d31b7d8f7b2 --- /dev/null +++ b/twNLP-app/src/utils/ckip/pos/__init__.py @@ -0,0 +1,4 @@ +from .pos import PosTagging + + +__all__ = ["PosTagging"] diff --git a/twNLP-app/src/utils/ckip/pos/pos.py b/twNLP-app/src/utils/ckip/pos/pos.py new file mode 100644 index 0000000000000000000000000000000000000000..6ba114d14f9a5e0fd65ef55c935bce6058171ac1 --- /dev/null +++ b/twNLP-app/src/utils/ckip/pos/pos.py @@ -0,0 +1,38 @@ +from typing import List, Union +from dataclasses import dataclass +from models import connect_ckip_drivers + + +@dataclass +class PosTagging: + """ + The PosTagging object marks a word in `ws_result` as corresponding to a particular part of speech. + """ + + nlp_model: str + ws_result: List[Union[List[None], List[str]]] + + def __post_init__(self) -> None: + self.pos_driver = connect_ckip_drivers(self.nlp_model)[1] + + def pack_ws_pos_sentece(self, ws_pos_pair: tuple) -> List[tuple]: + """The pack_ws_pos_sentece method packs both words and thier part-of-speech to a pair. + Args: + ws_pos_pair (tuple): the pair of a word and its corresponding part-of-speech + Returns: + a list of tuples: [ + ('我', 'Nh'), + ('喜歡', 'VK'), + ('程式', 'Na') + ] + """ + + sentence_ws, sentence_pos = ws_pos_pair + assert len(sentence_ws) == len(sentence_pos) + return list( + map(lambda word_pos_pair: word_pos_pair, zip(sentence_ws, sentence_pos)) + ) + + def tag(self): + pos_pipeline = self.pos_driver(self.ws_result, use_delim=True) + return list(map(self.pack_ws_pos_sentece, zip(self.ws_result, pos_pipeline))) diff --git a/twNLP-app/src/utils/ckip/wsg/__init__.py b/twNLP-app/src/utils/ckip/wsg/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1eb6a36f81b396e0f3d1ea5ca67907f49256a825 --- /dev/null +++ b/twNLP-app/src/utils/ckip/wsg/__init__.py @@ -0,0 +1,4 @@ +from .wsg import WordSegmentation + + +__all__ = ["WordSegmentation"] diff --git a/twNLP-app/src/utils/ckip/wsg/wsg.py b/twNLP-app/src/utils/ckip/wsg/wsg.py new file mode 100644 index 0000000000000000000000000000000000000000..41f80625e071b722cacd77f1f858eb65d9897bd1 --- /dev/null +++ b/twNLP-app/src/utils/ckip/wsg/wsg.py @@ -0,0 +1,40 @@ +from typing import List, Union +from dataclasses import dataclass +from models import connect_ckip_drivers + + +@dataclass +class WordSegmentation: + """ + The WordSegmentation object divides written text in `sentence_lists` into meaningful units. + """ + + nlp_model: str + sentence_list: List[str] + + def __post_init__(self) -> None: + self.ws_driver = connect_ckip_drivers(self.nlp_model)[0] + + def remove_empty_string(self, sentence_list: List[str]) -> List[str]: + """The remove_empty_string method removes empty string in `sentence_list`. + Args: + sentence_list (list) + Returns: + a list + """ + return list(filter(lambda value: value is not "", sentence_list)) + + def segment(self) -> List[Union[List[None], List[List[str]]]]: + """The segment method divides written text in `sentence_lists` into meaningful units. + Returns: + a list of splitting text, an empty list otherwise. + """ + invalid_list = not self.sentence_list or all( + [value == "" for value in self.sentence_list] + ) + + if invalid_list: + return self.sentence_list + + filtered_list = self.remove_empty_string(self.sentence_list) + return self.ws_driver(filtered_list, use_delim=True) diff --git a/twNLP-app/src/utils/cwn/__init__.py b/twNLP-app/src/utils/cwn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5316e51e18da6e2d6b09433d66a5ac3f956d4d1a --- /dev/null +++ b/twNLP-app/src/utils/cwn/__init__.py @@ -0,0 +1,4 @@ +from .tagger import disambiguate_word_sense, create_cwn_sense_tags + + +__all__ = ["disambiguate_word_sense", "create_cwn_sense_tags"] diff --git a/twNLP-app/src/utils/cwn/tagger.py b/twNLP-app/src/utils/cwn/tagger.py new file mode 100644 index 0000000000000000000000000000000000000000..cb1ea997701f85b93f722293f6c6f27961acf6ba --- /dev/null +++ b/twNLP-app/src/utils/cwn/tagger.py @@ -0,0 +1,39 @@ +import asyncio +from typing import List, Tuple +from DistilTag import DistilTag +from CwnSenseTagger import senseTag + + +# -------------------------------------------------------------------- +# distil tag + +tagger = DistilTag() + + +async def tag_values(value): + """The tag_values function makes `tagger.tag` function an asynchronous function.""" + return tagger.tag(value) + + +async def disambiguate_word_sense(sentence_list: List[str]) -> List[Tuple[str]]: + """The disambiguate_word_sense function disambiuates the word sense. + Args: + sentence_list (list): a list of sentences + Returns: + a list of tuples. + """ + + return await asyncio.gather(*list(map(tag_values, sentence_list))) + + +# -------------------------------------------------------------------- +# CWN sense tag + + +async def sense_value(value): + """The sense_value function makes `senseTag` function an asynchronous function.""" + return senseTag(value) + + +async def create_cwn_sense_tags(disambiguated_list: List[List[List[tuple]]]): + return await asyncio.gather(*list(map(sense_value, disambiguated_list))) diff --git a/twNLP-app/src/utils/stores/__init__.py b/twNLP-app/src/utils/stores/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d7e0771446ebd27c4e2ea12ab57e0664fe045b38 --- /dev/null +++ b/twNLP-app/src/utils/stores/__init__.py @@ -0,0 +1,4 @@ +from .store import Store + + +__all__ = ["Store"] diff --git a/twNLP-app/src/utils/stores/store.py b/twNLP-app/src/utils/stores/store.py new file mode 100644 index 0000000000000000000000000000000000000000..581e3b9bd16d5954706af642bbe92e4dbf9a8da5 --- /dev/null +++ b/twNLP-app/src/utils/stores/store.py @@ -0,0 +1,48 @@ +from dataclasses import dataclass +from typing import Any, Callable, Dict + + +@dataclass +class Store: + """ + The Store object keeps tracks of changes and generates new states via a reducer. + """ + + reducer: Callable + + def __post_init__(self): + if callable(self.reducer) != True: + raise ValueError("Expecting a callable reducer function") + + self.__states = None + self.__listeners = list() + self.__reducer = self.reducer + + def dispatch(self, action: Dict[str, Any]): + if type(action) != dict: + raise ValueError("Expecting action to be of type dictionary") + + has_kind = "kind" in action + + if not has_kind: + raise ValueError("Action is expected to have an attribute 'kind'") + + currentStates = None + + if type(self.__states) == dict: + currentStates = self.__states.copy() + + self.__states = self.__reducer(currentStates, action) + self.__emitListeners() + pass + + def __emitListeners(self): + for listener in self.__listeners: + listener() + + def add_listener(self, callback): + if callable(callback): + self.__listeners.append(callback) + + def get_state(self): + return self.__states diff --git a/twNLP-app/src/utils/text/__init__.py b/twNLP-app/src/utils/text/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e51294f35aae1b9909ce64ea069b63ccf88b32ab --- /dev/null +++ b/twNLP-app/src/utils/text/__init__.py @@ -0,0 +1,5 @@ +from .textsubscript import add_multiple_textsubscripts +from .text_color import create_entity_color, create_pos_color + + +__all__ = ["add_multiple_textsubscripts", "create_entity_color", "create_pos_color"] diff --git a/twNLP-app/src/utils/text/text_color.py b/twNLP-app/src/utils/text/text_color.py new file mode 100644 index 0000000000000000000000000000000000000000..363025c9b232d4c46e092465fcfff7e9bcfc3e82 --- /dev/null +++ b/twNLP-app/src/utils/text/text_color.py @@ -0,0 +1,57 @@ +def create_pos_color(pos: str) -> str: + """The create_color function creats a text color base on the `pos` type. + Args: + pos (str): part of speech + Returns: + a str (e.g. 'rgb(255,0,0)' or '#ff0000') + """ + + if len(pos) >= 4: + return "rgb(102, 102, 102)" + + pos_color_factories = { + "A": "rgb(21, 170, 191)", + "C": "rgb(231, 41, 138)", + "D": "rgb(117, 112, 179)", + "I": "rgb(102, 166, 30)", + "N": "rgb(27, 158, 119)", + "P": "rgb(102, 166, 30)", + "S": "rgb(166, 118, 29)", + "T": "rgb(102, 166, 30)", + "V": "rgb(217, 95, 2)", + "F": "rgb(230, 171, 2)", + } + + return pos_color_factories[pos[0]] + + +def create_entity_color(entity: str): + """The create_color function creats a text color base on the `entity` type. + Args: + entity (str): an entity type from the result of NER + Returns: + a str (e.g. 'rgb(255,0,0)' or '#ff0000') + """ + + entity_color_factories = { + "GPE": "rgb(102, 166, 30)", + "PERSON": "rgb(166, 118, 29)", + "DATE": "rgb(217, 95, 2)", + "ORG": "rgb(102, 166, 30)", + "CARDINAL": "rgb(27, 158, 119)", + "ORDINAL": "rgb(231, 41, 138)", + "NORP": "rgb(117, 112, 179)", + "LOC": "rgb(27, 158, 119)", + "TIME": "rgb(117, 112, 179)", + "FAC": "rgb(231, 41, 138)", + "MONEY": "rgb(217, 95, 2)", + "ORDINAL": "rgb(231, 41, 138)", + "EVENT": "rgb(117, 112, 179)", + "WORK_OF_ART": "rgb(231, 41, 138)", + "QUANTITY": "rgb(217, 95, 2)", + "PERCENT": "rgb(230, 171, 2)", + "LANGUAGE": "rgb(230, 171, 2)", + "PRODUCT": "rgb(27, 158, 119)", + "LAW": "rgb(166, 118, 29)", + } + return entity_color_factories[entity] diff --git a/twNLP-app/src/utils/text/textsubscript.py b/twNLP-app/src/utils/text/textsubscript.py new file mode 100644 index 0000000000000000000000000000000000000000..b4fa094aade2e0bf43876cee8695fab279abdd87 --- /dev/null +++ b/twNLP-app/src/utils/text/textsubscript.py @@ -0,0 +1,39 @@ +import asyncio +from itertools import chain +from typing import List, Union +from .text_color import create_pos_color + + +async def add_pos_textsubscript(data_list: List[str]) -> str: + + create = ( + lambda value: f"""{value[0]}({value[1]})""" + ) + return "".join(list(map(create, data_list))) + + +async def add_ws_textsubscript(data_list: List[str]) -> str: + create = lambda value: f"{value}" + return "".join(list(map(create, data_list))) + + +async def add_cwn_textsubscript(segmented_list) -> str: + create = ( + lambda value: f"{value[0]}{value[1]}" + ) + return "".join(list(map(create, chain(*segmented_list)))) + + +async def add_multiple_textsubscripts( + target: str, list_of_lists: List[List[Union[str, tuple]]] +) -> List[str]: + + textsubscript_factories = { + "ws": add_ws_textsubscript, + "pos": add_pos_textsubscript, + "cwn": add_cwn_textsubscript, + } + + return await asyncio.gather( + *list(map(textsubscript_factories[target], list_of_lists)) + ) diff --git a/twNLP-app/src/views/__init__.py b/twNLP-app/src/views/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/twNLP-app/src/views/components/form/__init__.py b/twNLP-app/src/views/components/form/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..44232621298e24c89ff927ae9a442a4e95c503af --- /dev/null +++ b/twNLP-app/src/views/components/form/__init__.py @@ -0,0 +1,4 @@ +from .form import form_controller + + +__all__ = ["form_controller"] diff --git a/twNLP-app/src/views/components/form/form.py b/twNLP-app/src/views/components/form/form.py new file mode 100644 index 0000000000000000000000000000000000000000..23888c070c02a0be503d2a141a1bf54181c7c71d --- /dev/null +++ b/twNLP-app/src/views/components/form/form.py @@ -0,0 +1,13 @@ +from typing import Union +from .form_components import add_text_area, add_selectbox, add_multiselect + + +def form_controller(control: str, **kwargs) -> Union[str, int]: + """The form_controller function builds a form component based on `control`.""" + form_factories = { + "text-area": add_text_area, + "select-box": add_selectbox, + "multi-select": add_multiselect, + } + + return form_factories[control](**kwargs) diff --git a/twNLP-app/src/views/components/form/form_components/__init__.py b/twNLP-app/src/views/components/form/form_components/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cde5b6c4a3aa49a2f72d2b32809b2677e8f3a9f --- /dev/null +++ b/twNLP-app/src/views/components/form/form_components/__init__.py @@ -0,0 +1,6 @@ +from .text_area import add_text_area +from .select_box import add_selectbox +from .multi_select import add_multiselect + + +__all__ = ["add_text_area", "add_selectbox", "add_multiselect"] diff --git a/twNLP-app/src/views/components/form/form_components/multi_select.py b/twNLP-app/src/views/components/form/form_components/multi_select.py new file mode 100644 index 0000000000000000000000000000000000000000..4ebb68844dd354987071a6da6e7c94b5a01c70f7 --- /dev/null +++ b/twNLP-app/src/views/components/form/form_components/multi_select.py @@ -0,0 +1,14 @@ +from streamlit import sidebar +from typing import Callable, List, Optional + + +def add_multiselect( + title: str, + options: List[str], + key: Optional[str] = None, + on_change: Optional[Callable] = None, + format_func: Optional[Callable] = None +): + return sidebar.multiselect( + title, options=options, default=list(options), key=key, on_change=on_change, format_func=format_func + ) diff --git a/twNLP-app/src/views/components/form/form_components/select_box.py b/twNLP-app/src/views/components/form/form_components/select_box.py new file mode 100644 index 0000000000000000000000000000000000000000..aa0a0b3ebdd73bc7c33e42fd6c96615b288a25d0 --- /dev/null +++ b/twNLP-app/src/views/components/form/form_components/select_box.py @@ -0,0 +1,11 @@ +from streamlit import selectbox +from typing import Callable, Optional, Tuple + + +def add_selectbox( + title: str, + options: Tuple[str], + key: Optional[str] = None, + on_change: Optional[Callable] = None, +): + return selectbox(title, options=options, key=key, on_change=on_change) diff --git a/twNLP-app/src/views/components/form/form_components/text_area.py b/twNLP-app/src/views/components/form/form_components/text_area.py new file mode 100644 index 0000000000000000000000000000000000000000..c9134b19d4e39db9b7e337358a56bd86f3b8ec2f --- /dev/null +++ b/twNLP-app/src/views/components/form/form_components/text_area.py @@ -0,0 +1,11 @@ +from typing import Optional +from streamlit import text_area + + +def add_text_area( + title: str, + placeholder: Optional[str] = "", + height: Optional[int] = None, + key: Optional[str] = None, +): + return text_area(title, placeholder=placeholder, height=height, key=key) diff --git a/twNLP-app/src/views/components/sidebar/__init__.py b/twNLP-app/src/views/components/sidebar/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..070436e9db4005c8755981b3053a9970a249236b --- /dev/null +++ b/twNLP-app/src/views/components/sidebar/__init__.py @@ -0,0 +1,4 @@ +from .sidebar import visualize_side_bar + + +__all__ = ["visualize_side_bar"] diff --git a/twNLP-app/src/views/components/sidebar/options.py b/twNLP-app/src/views/components/sidebar/options.py new file mode 100644 index 0000000000000000000000000000000000000000..05725a449fa4f4bbca822f510f5b06ea57d85e78 --- /dev/null +++ b/twNLP-app/src/views/components/sidebar/options.py @@ -0,0 +1,3 @@ +PIPELINE_OPTIONS = ("CKIP", "CWN") +CWN_VISUALIZERS = ["DistilTag", "CwnSenseTag"] +CKIP_VISUALIZERS = [{"wsg": "斷詞系統"}, {"pos": "詞類標記"}, {"ner": "實體辨識"}] diff --git a/twNLP-app/src/views/components/sidebar/sidebar.py b/twNLP-app/src/views/components/sidebar/sidebar.py new file mode 100644 index 0000000000000000000000000000000000000000..9660b0e15af906026540f53faead5d37419400f9 --- /dev/null +++ b/twNLP-app/src/views/components/sidebar/sidebar.py @@ -0,0 +1,61 @@ +import streamlit as st +from ..form import form_controller +from typing import Dict, List, Union +from .options import PIPELINE_OPTIONS, CKIP_VISUALIZERS, CWN_VISUALIZERS + + +def remove_input_data(): + if "input_data" in st.session_state: + del st.session_state["input_data"] + + +def format_option(option: Union[str, Dict[str, str]]) -> str: + """The format_options function formats each option in a list of options. + If `option` is a dict, the function will extract the value from the dict. + + Args: + option (str or dict) + Returns: + a str + """ + + if isinstance(option, dict): + return list(option.values())[0] + + return option + + +def visualize_side_bar(ckip_nlp_models: List[str]): + with st.sidebar: + st.image( + "https://avatars.githubusercontent.com/u/21136511?s=200&v=4", width=100 + ) + + + pipeline_options = form_controller( + control="select-box", + title="中文 NLP 管線處理:", + options=PIPELINE_OPTIONS, + on_change=remove_input_data, + ) + + model_options = None + + if pipeline_options == "CKIP": + model_options = form_controller( + control="select-box", + title="NLP 模型:", + options=ckip_nlp_models, + key="model", + ) + + visualizers = {"CKIP": CKIP_VISUALIZERS, "CWN": CWN_VISUALIZERS} + + active_visualizers = form_controller( + control="multi-select", + title="功能:", + options=visualizers[pipeline_options], + format_func=format_option, + ) + + return model_options, pipeline_options, active_visualizers diff --git a/twNLP-app/src/views/components/spinner/__init__.py b/twNLP-app/src/views/components/spinner/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5ee6a819550c9c1bd582ab81d7fc9cd0df0a91ca --- /dev/null +++ b/twNLP-app/src/views/components/spinner/__init__.py @@ -0,0 +1,5 @@ +from .cwn import download_cwn_drivers +from .ckip import dowload_ckip_package + + +__all__ = ["download_cwn_drivers", "dowload_ckip_package"] diff --git a/twNLP-app/src/views/components/spinner/ckip.py b/twNLP-app/src/views/components/spinner/ckip.py new file mode 100644 index 0000000000000000000000000000000000000000..e3a92ba4eae35a80cb7ec38bff7817ccce8dd991 --- /dev/null +++ b/twNLP-app/src/views/components/spinner/ckip.py @@ -0,0 +1,17 @@ +import asyncio +import streamlit as st +from pathlib import Path +from configs import ckip_path, download_ckip_drivers + + +def dowload_ckip_package(ckip_nlp_models): + drivers = list( + map(lambda model: ckip_path / f"{model}_drivers.pickle", ckip_nlp_models) + ) + + while not all(list(map(lambda path: Path(path).exists(), drivers))): + with st.spinner("Downloading CKIP models ..."): + asyncio.run(download_ckip_drivers(ckip_nlp_models)) + + if all(list(map(lambda path: Path(path).exists(), drivers))): + break diff --git a/twNLP-app/src/views/components/spinner/cwn.py b/twNLP-app/src/views/components/spinner/cwn.py new file mode 100644 index 0000000000000000000000000000000000000000..e908594f2350eaaef23417e1f9b72be6a9d80c15 --- /dev/null +++ b/twNLP-app/src/views/components/spinner/cwn.py @@ -0,0 +1,19 @@ +import streamlit as st +from pathlib import Path +from configs import download_cwn_models, cwn_model_path + + +def download_cwn_drivers(upgrade): + cwn_drivers = [ + cwn_model_path / "cwn-graph-v.2022.08.01.pyobj", + cwn_model_path / "manifest.json", + cwn_model_path / "cwn-wsd-model", + cwn_model_path / "tagmodel", + ] + + while not all(list(map(lambda path: Path(path).exists(), cwn_drivers))): + with st.spinner("Downloading CWN models ..."): + download_cwn_models(upgrade) + + if all(list(map(lambda path: Path(path).exists(), cwn_drivers))): + break diff --git a/twNLP-app/src/views/containers/__init__.py b/twNLP-app/src/views/containers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ad951114ec4af29c4ccb88a0ac4cbb2f7f68dd88 --- /dev/null +++ b/twNLP-app/src/views/containers/__init__.py @@ -0,0 +1,7 @@ +from .cwn import display_cwn +from .ckip import display_ckip +from .data_form import display_data_form + + + +__all__ = ["display_data_form", "display_cwn", "display_ckip"] diff --git a/twNLP-app/src/views/containers/ckip/__init__.py b/twNLP-app/src/views/containers/ckip/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6ae5aa3191b65aad8bfbd3902913facd6dbd55b7 --- /dev/null +++ b/twNLP-app/src/views/containers/ckip/__init__.py @@ -0,0 +1,4 @@ +from .ckip import display_ckip + + +__all__ = ["display_ckip"] diff --git a/twNLP-app/src/views/containers/ckip/ckip.py b/twNLP-app/src/views/containers/ckip/ckip.py new file mode 100644 index 0000000000000000000000000000000000000000..01dac71c37eb9dea026aa77f6358fba19d3b2a64 --- /dev/null +++ b/twNLP-app/src/views/containers/ckip/ckip.py @@ -0,0 +1,20 @@ +import streamlit as st +from typing import List +from ...services import request + +divider_tag = "