Spaces:
Runtime error
Runtime error
Commit
·
dae4805
1
Parent(s):
d54a6ab
chore: Add Dockerfile and requirements.txt for containerization
Browse files- .gitignore +160 -0
- Dockerfile +13 -0
- app.py +196 -0
- note.txt +23 -0
- requirements.txt +77 -0
- run.py +9 -0
.gitignore
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# poetry
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
102 |
+
#poetry.lock
|
103 |
+
|
104 |
+
# pdm
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
106 |
+
#pdm.lock
|
107 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
108 |
+
# in version control.
|
109 |
+
# https://pdm.fming.dev/#use-with-ide
|
110 |
+
.pdm.toml
|
111 |
+
|
112 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
113 |
+
__pypackages__/
|
114 |
+
|
115 |
+
# Celery stuff
|
116 |
+
celerybeat-schedule
|
117 |
+
celerybeat.pid
|
118 |
+
|
119 |
+
# SageMath parsed files
|
120 |
+
*.sage.py
|
121 |
+
|
122 |
+
# Environments
|
123 |
+
.env
|
124 |
+
.venv
|
125 |
+
env/
|
126 |
+
venv/
|
127 |
+
ENV/
|
128 |
+
env.bak/
|
129 |
+
venv.bak/
|
130 |
+
|
131 |
+
# Spyder project settings
|
132 |
+
.spyderproject
|
133 |
+
.spyproject
|
134 |
+
|
135 |
+
# Rope project settings
|
136 |
+
.ropeproject
|
137 |
+
|
138 |
+
# mkdocs documentation
|
139 |
+
/site
|
140 |
+
|
141 |
+
# mypy
|
142 |
+
.mypy_cache/
|
143 |
+
.dmypy.json
|
144 |
+
dmypy.json
|
145 |
+
|
146 |
+
# Pyre type checker
|
147 |
+
.pyre/
|
148 |
+
|
149 |
+
# pytype static type analyzer
|
150 |
+
.pytype/
|
151 |
+
|
152 |
+
# Cython debug symbols
|
153 |
+
cython_debug/
|
154 |
+
|
155 |
+
# PyCharm
|
156 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
157 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
158 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
159 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
+
#.idea/
|
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim-bullseye
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
RUN python -m spacy download en_core_web_lg
|
10 |
+
|
11 |
+
COPY . .
|
12 |
+
|
13 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import spacy
|
4 |
+
import uvicorn
|
5 |
+
import docx
|
6 |
+
import requests
|
7 |
+
import spacy
|
8 |
+
from presidio_analyzer import RecognizerRegistry
|
9 |
+
from presidio_analyzer.nlp_engine import (
|
10 |
+
NlpEngine,
|
11 |
+
NlpEngineProvider,
|
12 |
+
)
|
13 |
+
# import google.generativeai as genai
|
14 |
+
from dotenv import load_dotenv
|
15 |
+
from transformers import pipeline
|
16 |
+
from presidio_analyzer import AnalyzerEngine
|
17 |
+
from presidio_anonymizer import AnonymizerEngine
|
18 |
+
|
19 |
+
from fastapi import FastAPI, Request, UploadFile, File
|
20 |
+
from fastapi import FastAPI, Request
|
21 |
+
from fastapi.responses import JSONResponse
|
22 |
+
|
23 |
+
load_dotenv()
|
24 |
+
app = FastAPI(root_path=os.environ.get("ROOT_PATH"))
|
25 |
+
# genai.configure(api_key=os.environ.get("GOOGLE_API_KEY"))
|
26 |
+
# model = genai.GenerativeModel('gemini-pro')
|
27 |
+
HUGGINGFACE_KEY = os.environ.get("HUGGINGFACE_KEY")
|
28 |
+
# pipe = pipeline("fill-mask", model="pranavraj1103/ksp-mask-model")
|
29 |
+
|
30 |
+
|
31 |
+
def create_nlp_engine_with_spacy(
|
32 |
+
model_path: str = "en_core_web_sm",
|
33 |
+
):
|
34 |
+
"""
|
35 |
+
Instantiate an NlpEngine with a spaCy model
|
36 |
+
:param model_path: path to model / model name.
|
37 |
+
"""
|
38 |
+
nlp_configuration = {
|
39 |
+
"nlp_engine_name": "spacy",
|
40 |
+
"models": [{"lang_code": "en", "model_name": model_path}],
|
41 |
+
"ner_model_configuration": {
|
42 |
+
"model_to_presidio_entity_mapping": {
|
43 |
+
"PER": "PERSON",
|
44 |
+
"PERSON": "PERSON",
|
45 |
+
"NORP": "NRP",
|
46 |
+
"FAC": "FACILITY",
|
47 |
+
"LOC": "LOCATION",
|
48 |
+
"GPE": "LOCATION",
|
49 |
+
"LOCATION": "LOCATION",
|
50 |
+
"ORG": "ORGANIZATION",
|
51 |
+
"ORGANIZATION": "ORGANIZATION",
|
52 |
+
"DATE": "DATE_TIME",
|
53 |
+
"TIME": "DATE_TIME",
|
54 |
+
},
|
55 |
+
"low_confidence_score_multiplier": 0.4,
|
56 |
+
"low_score_entity_names": ["ORG", "ORGANIZATION"],
|
57 |
+
},
|
58 |
+
}
|
59 |
+
|
60 |
+
nlp_engine = NlpEngineProvider(nlp_configuration=nlp_configuration).create_engine()
|
61 |
+
|
62 |
+
registry = RecognizerRegistry()
|
63 |
+
registry.load_predefined_recognizers(nlp_engine=nlp_engine)
|
64 |
+
|
65 |
+
return nlp_engine, registry
|
66 |
+
|
67 |
+
nlp_engine, registry = create_nlp_engine_with_spacy()
|
68 |
+
|
69 |
+
analyzer = AnalyzerEngine(nlp_engine=nlp_engine, registry=registry)
|
70 |
+
anonymizer = AnonymizerEngine()
|
71 |
+
|
72 |
+
@app.get("/")
|
73 |
+
async def read_root():
|
74 |
+
return {"message": "Hello World"}
|
75 |
+
|
76 |
+
|
77 |
+
@app.get("/vocab_thresh_masking")
|
78 |
+
async def vocab_thresh_masking(text, threshold):
|
79 |
+
ner_model = spacy.load("en_core_web_sm")
|
80 |
+
doc = ner_model(text)
|
81 |
+
word_counts = dict()
|
82 |
+
for token in doc:
|
83 |
+
word_counts[token.text] = word_counts.get(str(token.text), 0) + 1
|
84 |
+
|
85 |
+
threshold = int(threshold)
|
86 |
+
frequent_words = [word for word, count in word_counts.items() if count >= threshold]
|
87 |
+
masked_text = []
|
88 |
+
pii_locations = [] # List to store (start index, end index, type) tuples
|
89 |
+
for i, token in enumerate(doc):
|
90 |
+
if str(token.text) in frequent_words:
|
91 |
+
masked_text.append(str(token.text))
|
92 |
+
else:
|
93 |
+
masked_text.append("[MASK]")
|
94 |
+
# Potentially masked PII, record location and tentative type (UNKNOWN)
|
95 |
+
pii_locations.append((token.idx, token.idx + len(token.text), "UNKNOWN"))
|
96 |
+
return " ".join(masked_text), pii_locations
|
97 |
+
|
98 |
+
|
99 |
+
@app.get("/entity_tagger_masking")
|
100 |
+
async def entity_tagger_masking(text):
|
101 |
+
ner_model = spacy.load("en_core_web_sm")
|
102 |
+
doc = ner_model(text)
|
103 |
+
masked_text = []
|
104 |
+
pii_locations = []
|
105 |
+
for token in doc:
|
106 |
+
if token.ent_type_ == "PERSON":
|
107 |
+
masked_text.append("[MASK]")
|
108 |
+
pii_locations.append((token.idx, token.idx + len(token.text), "PERSON"))
|
109 |
+
elif token.ent_type_ == "LOC":
|
110 |
+
masked_text.append("[MASK]")
|
111 |
+
pii_locations.append((token.idx, token.idx + len(token.text), "LOCATION"))
|
112 |
+
elif token.ent_type_ == "ORG":
|
113 |
+
masked_text.append("[MASK]")
|
114 |
+
pii_locations.append((token.idx, token.idx + len(token.text), "ORGANIZATION"))
|
115 |
+
elif token.ent_type_ == "DATE":
|
116 |
+
masked_text.append("[MASK]")
|
117 |
+
pii_locations.append((token.idx, token.idx + len(token.text), "DATE"))
|
118 |
+
else:
|
119 |
+
masked_text.append(token.text)
|
120 |
+
return " ".join(masked_text), pii_locations
|
121 |
+
|
122 |
+
|
123 |
+
@app.get("/email_and_phone")
|
124 |
+
async def identify_email_and_phone(text):
|
125 |
+
# use regex to identify emails and phone numbers and mask them
|
126 |
+
email_pattern = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}"
|
127 |
+
phone_pattern = r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b"
|
128 |
+
|
129 |
+
# find the location of emails and phone numbers
|
130 |
+
pii_locations = []
|
131 |
+
for match in re.finditer(email_pattern, text):
|
132 |
+
pii_locations.append((match.start(), match.end(), "EMAIL"))
|
133 |
+
for match in re.finditer(phone_pattern, text):
|
134 |
+
pii_locations.append((match.start(), match.end(), "PHONE NUMBER"))
|
135 |
+
|
136 |
+
# mask the emails and phone numbers
|
137 |
+
text = re.sub(email_pattern, "[MASK]", text)
|
138 |
+
text = re.sub(phone_pattern, "[MASK]", text)
|
139 |
+
return text, pii_locations
|
140 |
+
|
141 |
+
|
142 |
+
@app.get("/anonymize_masked_text")
|
143 |
+
async def anonymize_masked_text(masked_text):
|
144 |
+
# prompt = f"The following text contains Personal Information Identifiers marked with [MASK]: \n```\n{masked_text}\n```\n Please anonymize these Personal Identity Identifiers by replacing the '[MASK]' with random placeholders while preserving the context so that the text can be used for analysis."
|
145 |
+
# print(prompt)
|
146 |
+
# response = model.generate_content(prompt)
|
147 |
+
# return response.text
|
148 |
+
API_URL = "https://api-inference.huggingface.co/models/pranavraj1103/ksp-mask-model"
|
149 |
+
headers = {"Authorization": f"Bearer {HUGGINGFACE_KEY}"}
|
150 |
+
|
151 |
+
def query(payload):
|
152 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
153 |
+
return response.json()
|
154 |
+
|
155 |
+
output = query({
|
156 |
+
"inputs": "The <mask> to the universe is <mask>.",
|
157 |
+
})
|
158 |
+
|
159 |
+
return output
|
160 |
+
|
161 |
+
|
162 |
+
@app.post("/parse_doc")
|
163 |
+
async def parse_doc(file: UploadFile):
|
164 |
+
if file.filename.endswith(".txt"):
|
165 |
+
return file.file.read()
|
166 |
+
doc = docx.Document(file.file)
|
167 |
+
full_text = []
|
168 |
+
for para in doc.paragraphs:
|
169 |
+
full_text.append(para.text)
|
170 |
+
return "\n".join(full_text)
|
171 |
+
|
172 |
+
|
173 |
+
@app.post("/presidio_mask")
|
174 |
+
async def presidio_mask(text):
|
175 |
+
results = analyzer.analyze(text=text, language='en')
|
176 |
+
# for rec in results:
|
177 |
+
# print(rec.start)
|
178 |
+
# print(*[text[res.start : res.end] for res in results])
|
179 |
+
# anonymized_text = anonymizer.anonymize(text=text,analyzer_results=results)
|
180 |
+
# return anonymized_text, results
|
181 |
+
|
182 |
+
return_list = []
|
183 |
+
seen_set = set()
|
184 |
+
for rec in results:
|
185 |
+
if (rec.score < 0.1) or (rec.start, rec.end) in seen_set:
|
186 |
+
continue
|
187 |
+
return_list.append({
|
188 |
+
"start": rec.start,
|
189 |
+
"end": rec.end,
|
190 |
+
"entity_type": rec.entity_type,
|
191 |
+
"text": text[rec.start:rec.end],
|
192 |
+
"score": rec.score,
|
193 |
+
})
|
194 |
+
seen_set.add((rec.start, rec.end))
|
195 |
+
return return_list
|
196 |
+
|
note.txt
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#downloading spacy model
|
2 |
+
python -m spacy download en_core_web_lg
|
3 |
+
|
4 |
+
sample_text = "My phone number is 212-555-5555, and my friend number is 9876543210"
|
5 |
+
sample_text_2 = """The text in the image is a police report from the Amengad Police Station in Bagalkot, Karnataka, India. The report is dated 10-11-2022 and is about a man named Ramasawamy. The report states that Ramasawamy is a "rowdy" and a "habitual offender" who "disturbs public peace in public places." The report also states that Ramasawamy is "under surveillance."
|
6 |
+
|
7 |
+
The report is signed by a police officer named SOMAPPA. The report is also stamped with the seal of the Amengad Police Station.
|
8 |
+
|
9 |
+
Police Report Police Station:
|
10 |
+
|
11 |
+
Amengad PS Case Number: 2022000003
|
12 |
+
|
13 |
+
Date: 10-11-2022
|
14 |
+
|
15 |
+
Subject: Ramasawamy
|
16 |
+
|
17 |
+
Details: The accused is a rowdy and a habitual offender. He disturbs public peace in public places. He is under surveillance.
|
18 |
+
|
19 |
+
Action Taken: The accused has been warned. He has been told to stop disturbing public peace.
|
20 |
+
|
21 |
+
Signature: SOMAPPA Police
|
22 |
+
|
23 |
+
Officer Seal: Amengad Police Station"""
|
requirements.txt
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
annotated-types==0.6.0
|
2 |
+
anyio==4.3.0
|
3 |
+
blis==0.7.11
|
4 |
+
catalogue==2.0.10
|
5 |
+
certifi==2024.2.2
|
6 |
+
charset-normalizer==3.3.2
|
7 |
+
click==8.1.7
|
8 |
+
cloudpathlib==0.16.0
|
9 |
+
colorama==0.4.6
|
10 |
+
confection==0.1.4
|
11 |
+
cymem==2.0.8
|
12 |
+
dnspython==2.6.1
|
13 |
+
email_validator==2.1.1
|
14 |
+
en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl#sha256=ab70aeb6172cde82508f7739f35ebc9918a3d07debeed637403c8f794ba3d3dc
|
15 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl#sha256=86cc141f63942d4b2c5fcee06630fd6f904788d2f0ab005cce45aadb8fb73889
|
16 |
+
fastapi==0.111.0
|
17 |
+
fastapi-cli==0.0.3
|
18 |
+
filelock==3.14.0
|
19 |
+
fsspec==2024.5.0
|
20 |
+
h11==0.14.0
|
21 |
+
httpcore==1.0.5
|
22 |
+
httptools==0.6.1
|
23 |
+
httpx==0.27.0
|
24 |
+
huggingface-hub==0.23.0
|
25 |
+
idna==3.7
|
26 |
+
Jinja2==3.1.4
|
27 |
+
langcodes==3.4.0
|
28 |
+
language_data==1.2.0
|
29 |
+
lxml==5.2.2
|
30 |
+
marisa-trie==1.1.1
|
31 |
+
markdown-it-py==3.0.0
|
32 |
+
MarkupSafe==2.1.5
|
33 |
+
mdurl==0.1.2
|
34 |
+
murmurhash==1.0.10
|
35 |
+
numpy==1.26.4
|
36 |
+
orjson==3.10.3
|
37 |
+
packaging==24.0
|
38 |
+
phonenumbers==8.13.37
|
39 |
+
pillow==10.3.0
|
40 |
+
preshed==3.0.9
|
41 |
+
presidio-analyzer==2.2.354
|
42 |
+
presidio-anonymizer==2.2.354
|
43 |
+
pycryptodome==3.20.0
|
44 |
+
pydantic==2.7.1
|
45 |
+
pydantic_core==2.18.2
|
46 |
+
Pygments==2.18.0
|
47 |
+
python-docx==1.1.2
|
48 |
+
python-dotenv==1.0.1
|
49 |
+
python-multipart==0.0.9
|
50 |
+
PyYAML==6.0.1
|
51 |
+
regex==2024.5.15
|
52 |
+
requests==2.31.0
|
53 |
+
requests-file==2.0.0
|
54 |
+
rich==13.7.1
|
55 |
+
safetensors==0.4.3
|
56 |
+
shellingham==1.5.4
|
57 |
+
smart-open==6.4.0
|
58 |
+
sniffio==1.3.1
|
59 |
+
spacy==3.7.4
|
60 |
+
spacy-legacy==3.0.12
|
61 |
+
spacy-loggers==1.0.5
|
62 |
+
srsly==2.4.8
|
63 |
+
starlette==0.37.2
|
64 |
+
thinc==8.2.3
|
65 |
+
tldextract==5.1.2
|
66 |
+
tokenizers==0.19.1
|
67 |
+
tqdm==4.66.4
|
68 |
+
transformers==4.40.2
|
69 |
+
typer==0.9.4
|
70 |
+
typing_extensions==4.11.0
|
71 |
+
ujson==5.10.0
|
72 |
+
urllib3==2.2.1
|
73 |
+
uvicorn==0.29.0
|
74 |
+
wasabi==1.1.2
|
75 |
+
watchfiles==0.21.0
|
76 |
+
weasel==0.3.4
|
77 |
+
websockets==12.0
|
run.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import argparse
|
4 |
+
from typing import List, Optional, Union
|
5 |
+
|
6 |
+
import uvicorn
|
7 |
+
|
8 |
+
if __name__ == "__main__":
|
9 |
+
uvicorn.run("app:app", host="127.0.0.1", port=8000, reload=True)
|