Spaces:
Sleeping
Sleeping
Upload 22 files
Browse files- .github/workflows/sync_to_hf_space.yml +18 -0
- .gitignore +2 -0
- Dockerfile +21 -0
- README.md +87 -12
- __init__.py +0 -0
- apis/__init__.py +0 -0
- apis/chat_api.py +232 -0
- apis/lang_name.json +17 -0
- apis/models/__init__.py +0 -0
- examples/__init__.py +1 -0
- examples/chat_with_openai.py +25 -0
- examples/chat_with_post.py +55 -0
- messagers/__init__.py +0 -0
- messagers/message_composer.py +128 -0
- messagers/message_outputer.py +63 -0
- models/__init__.py +0 -0
- networks/__init__.py +0 -0
- networks/message_streamer.py +97 -0
- requirements.txt +9 -0
- utils/__init__.py +69 -0
- utils/enver.py +60 -0
- utils/logger.py +269 -0
.github/workflows/sync_to_hf_space.yml
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches: [main]
|
5 |
+
workflow_dispatch:
|
6 |
+
|
7 |
+
jobs:
|
8 |
+
sync-to-hub:
|
9 |
+
runs-on: ubuntu-latest
|
10 |
+
steps:
|
11 |
+
- uses: actions/checkout@v3
|
12 |
+
with:
|
13 |
+
fetch-depth: 0
|
14 |
+
lfs: true
|
15 |
+
- name: Push to hub
|
16 |
+
env:
|
17 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
18 |
+
run: git push -f https://Hansimov:[email protected]/spaces/Hansimov/hf-llm-api main
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
secrets.json
|
2 |
+
__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
WORKDIR $HOME/app
|
3 |
+
COPY . .
|
4 |
+
RUN pip install -r requirements.txt
|
5 |
+
VOLUME /data
|
6 |
+
EXPOSE 23333
|
7 |
+
RUN useradd -m -u 1000 user
|
8 |
+
USER user
|
9 |
+
ENV HOME=/home/user \
|
10 |
+
PATH=/home/user/.local/bin:$PATH
|
11 |
+
|
12 |
+
WORKDIR $HOME/app
|
13 |
+
|
14 |
+
COPY --chown=user . $HOME/app
|
15 |
+
RUN mkdir -p $HOME/app/models
|
16 |
+
RUN chmod 777 $HOME/app/models
|
17 |
+
ENV MODELS_PATH=$HOME/app/models
|
18 |
+
RUN mkdir -p $HOME/app/uploads
|
19 |
+
RUN chmod 777 $HOME/app/uploads
|
20 |
+
|
21 |
+
CMD ["python", "-m", "apis.chat_api"]
|
README.md
CHANGED
@@ -1,12 +1,87 @@
|
|
1 |
-
---
|
2 |
-
title: Selam Translate
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo: gray
|
6 |
-
sdk: docker
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Selam Translate API
|
3 |
+
emoji: ☯️
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: gray
|
6 |
+
sdk: docker
|
7 |
+
app_port: 23333
|
8 |
+
---
|
9 |
+
|
10 |
+
## Selam Translate API
|
11 |
+
Multilingual Translation and Language Detection API.
|
12 |
+
|
13 |
+
## Features
|
14 |
+
|
15 |
+
✅ Implemented:
|
16 |
+
|
17 |
+
- Language detection (`/detect`)
|
18 |
+
- Translation via Google Translate (`/translate`)
|
19 |
+
- Translation via local AI models (`/translate/ai`) using Hugging Face `transformers`
|
20 |
+
- Docker deployment
|
21 |
+
|
22 |
+
🔤 Supported languages (primary):
|
23 |
+
|
24 |
+
`auto`, `en` (English), `am` (Amharic), `ar` (Arabic), `ti` (Tigrinya), `om` (Oromo), `so` (Somali), `ko` (Korean), `zh-CN` (Chinese Simplified), `zh-TW` (Chinese Traditional), `fr` (French), `it` (Italian), `ja` (Japanese), `de` (German)
|
25 |
+
|
26 |
+
## Run API service
|
27 |
+
|
28 |
+
### Run in Command Line
|
29 |
+
|
30 |
+
**Install dependencies:**
|
31 |
+
|
32 |
+
```bash
|
33 |
+
# pipreqs . --force --mode no-pin
|
34 |
+
pip install -r requirements.txt
|
35 |
+
```
|
36 |
+
|
37 |
+
**Run API:**
|
38 |
+
|
39 |
+
```bash
|
40 |
+
python -m apis.chat_api
|
41 |
+
```
|
42 |
+
|
43 |
+
## Run via Docker
|
44 |
+
|
45 |
+
**Docker build:**
|
46 |
+
|
47 |
+
```bash
|
48 |
+
sudo docker build -t hf-llm-api:1.0 . --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
|
49 |
+
```
|
50 |
+
|
51 |
+
**Docker run:**
|
52 |
+
|
53 |
+
```bash
|
54 |
+
# no proxy
|
55 |
+
sudo docker run -p 23333:23333 hf-llm-api:1.0
|
56 |
+
|
57 |
+
# with proxy
|
58 |
+
sudo docker run -p 23333:23333 --env http_proxy="http://<server>:<port>" hf-llm-api:1.0
|
59 |
+
```
|
60 |
+
|
61 |
+
## API Usage
|
62 |
+
|
63 |
+
### API Usage
|
64 |
+
|
65 |
+
- Detect language
|
66 |
+
|
67 |
+
```bash
|
68 |
+
curl -X POST http://127.0.0.1:23333/detect \
|
69 |
+
-H "Content-Type: application/json" \
|
70 |
+
-d '{"input_text": "Hello, how are you?"}'
|
71 |
+
```
|
72 |
+
|
73 |
+
- Translate (Google)
|
74 |
+
|
75 |
+
```bash
|
76 |
+
curl -X POST http://127.0.0.1:23333/translate \
|
77 |
+
-H "Content-Type: application/json" \
|
78 |
+
-d '{"to_language": "ar", "input_text": "Hello"}'
|
79 |
+
```
|
80 |
+
|
81 |
+
- Translate (AI model)
|
82 |
+
|
83 |
+
```bash
|
84 |
+
curl -X POST http://127.0.0.1:23333/translate/ai \
|
85 |
+
-H "Content-Type: application/json" \
|
86 |
+
-d '{"model": "t5-base", "from_language": "en", "to_language": "fr", "input_text": "How are you?"}'
|
87 |
+
```
|
__init__.py
ADDED
File without changes
|
apis/__init__.py
ADDED
File without changes
|
apis/chat_api.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import uvicorn
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
6 |
+
import time
|
7 |
+
import json
|
8 |
+
import torch
|
9 |
+
import logging
|
10 |
+
|
11 |
+
|
12 |
+
from fastapi import FastAPI
|
13 |
+
from fastapi.encoders import jsonable_encoder
|
14 |
+
from fastapi.responses import JSONResponse
|
15 |
+
from pydantic import BaseModel, Field
|
16 |
+
from googletrans import Translator
|
17 |
+
from fastapi.middleware.cors import CORSMiddleware
|
18 |
+
|
19 |
+
class ChatAPIApp:
|
20 |
+
def __init__(self):
|
21 |
+
self.app = FastAPI(
|
22 |
+
docs_url="/",
|
23 |
+
title="Selam Translate API",
|
24 |
+
swagger_ui_parameters={"defaultModelsExpandDepth": -1},
|
25 |
+
version="1.0",
|
26 |
+
)
|
27 |
+
self.setup_routes()
|
28 |
+
|
29 |
+
def get_available_langs(self):
|
30 |
+
f = open('apis/lang_name.json', "r")
|
31 |
+
self.available_models = json.loads(f.read())
|
32 |
+
return self.available_models
|
33 |
+
|
34 |
+
class TranslateCompletionsPostItem(BaseModel):
|
35 |
+
from_language: str = Field(
|
36 |
+
default="en",
|
37 |
+
description="(str) `Detect`",
|
38 |
+
)
|
39 |
+
to_language: str = Field(
|
40 |
+
default="fa",
|
41 |
+
description="(str) `en`",
|
42 |
+
)
|
43 |
+
input_text: str = Field(
|
44 |
+
default="Hello",
|
45 |
+
description="(str) `Text for translate`",
|
46 |
+
)
|
47 |
+
|
48 |
+
|
49 |
+
def translate_completions(self, item: TranslateCompletionsPostItem):
|
50 |
+
translator = Translator()
|
51 |
+
f = open('apis/lang_name.json', "r")
|
52 |
+
available_langs = json.loads(f.read())
|
53 |
+
from_lang = 'en'
|
54 |
+
to_lang = 'en'
|
55 |
+
for lang_item in available_langs:
|
56 |
+
if item.to_language == lang_item['code']:
|
57 |
+
to_lang = item.to_language
|
58 |
+
break
|
59 |
+
|
60 |
+
|
61 |
+
translated = translator.translate(item.input_text, dest=to_lang)
|
62 |
+
item_response = {
|
63 |
+
"from_language": translated.src,
|
64 |
+
"to_language": translated.dest,
|
65 |
+
"text": item.input_text,
|
66 |
+
"translate": translated.text
|
67 |
+
}
|
68 |
+
json_compatible_item_data = jsonable_encoder(item_response)
|
69 |
+
return JSONResponse(content=json_compatible_item_data)
|
70 |
+
|
71 |
+
def translate_ai_completions(self, item: TranslateCompletionsPostItem):
|
72 |
+
translator = Translator()
|
73 |
+
f = open('apis/lang_name.json', "r")
|
74 |
+
available_langs = json.loads(f.read())
|
75 |
+
from_lang = 'en'
|
76 |
+
to_lang = 'en'
|
77 |
+
for lang_item in available_langs:
|
78 |
+
if item.to_language == lang_item['code']:
|
79 |
+
to_lang = item.to_language
|
80 |
+
if item.from_language == lang_item['code']:
|
81 |
+
from_lang = item.from_language
|
82 |
+
|
83 |
+
if to_lang == 'auto':
|
84 |
+
to_lang = 'en'
|
85 |
+
|
86 |
+
if from_lang == 'auto':
|
87 |
+
from_lang = translator.detect(item.input_text).lang
|
88 |
+
|
89 |
+
# Map ISO/lang codes to NLLB-200 language codes
|
90 |
+
nllb_code_map = {
|
91 |
+
'en': 'eng_Latn',
|
92 |
+
'am': 'amh_Ethi',
|
93 |
+
'ar': 'arb_Arab',
|
94 |
+
'ti': 'tir_Ethi',
|
95 |
+
'om': 'orm_Latn',
|
96 |
+
'so': 'som_Latn',
|
97 |
+
'ko': 'kor_Hang',
|
98 |
+
'zh-CN': 'zho_Hans',
|
99 |
+
'zh-TW': 'zho_Hant',
|
100 |
+
'fr': 'fra_Latn',
|
101 |
+
'de': 'deu_Latn',
|
102 |
+
'it': 'ita_Latn',
|
103 |
+
'ja': 'jpn_Jpan',
|
104 |
+
}
|
105 |
+
|
106 |
+
nllb_src = nllb_code_map.get(from_lang, 'eng_Latn')
|
107 |
+
nllb_tgt = nllb_code_map.get(to_lang, 'eng_Latn')
|
108 |
+
|
109 |
+
if torch.cuda.is_available():
|
110 |
+
device = torch.device("cuda:0")
|
111 |
+
else:
|
112 |
+
device = torch.device("cpu")
|
113 |
+
logging.warning("GPU not found, using CPU, translation will be very slow.")
|
114 |
+
|
115 |
+
time_start = time.time()
|
116 |
+
pretrained_model = "facebook/nllb-200-distilled-1.3B"
|
117 |
+
cache_dir = "models/"
|
118 |
+
tokenizer = AutoTokenizer.from_pretrained(pretrained_model, cache_dir=cache_dir)
|
119 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(pretrained_model, cache_dir=cache_dir).to(device)
|
120 |
+
model.eval()
|
121 |
+
|
122 |
+
tokenizer.src_lang = nllb_src
|
123 |
+
with torch.no_grad():
|
124 |
+
encoded_input = tokenizer(item.input_text, return_tensors="pt").to(device)
|
125 |
+
generated_tokens = model.generate(
|
126 |
+
**encoded_input,
|
127 |
+
forced_bos_token_id=tokenizer.lang_code_to_id[nllb_tgt],
|
128 |
+
)
|
129 |
+
translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
|
130 |
+
|
131 |
+
time_end = time.time()
|
132 |
+
translated = translated_text
|
133 |
+
item_response = {
|
134 |
+
"from_language": from_lang,
|
135 |
+
"to_language": to_lang,
|
136 |
+
"text": item.input_text,
|
137 |
+
"translate": translated,
|
138 |
+
"start": str(time_start),
|
139 |
+
"end": str(time_end)
|
140 |
+
}
|
141 |
+
json_compatible_item_data = jsonable_encoder(item_response)
|
142 |
+
return JSONResponse(content=json_compatible_item_data)
|
143 |
+
|
144 |
+
|
145 |
+
class DetectLanguagePostItem(BaseModel):
|
146 |
+
input_text: str = Field(
|
147 |
+
default="Hello, how are you?",
|
148 |
+
description="(str) `Text for detection`",
|
149 |
+
)
|
150 |
+
|
151 |
+
def detect_language(self, item: DetectLanguagePostItem):
|
152 |
+
translator = Translator()
|
153 |
+
detected = translator.detect(item.input_text)
|
154 |
+
|
155 |
+
item_response = {
|
156 |
+
"lang": detected.lang,
|
157 |
+
"confidence": detected.confidence,
|
158 |
+
}
|
159 |
+
json_compatible_item_data = jsonable_encoder(item_response)
|
160 |
+
return JSONResponse(content=json_compatible_item_data)
|
161 |
+
|
162 |
+
def setup_routes(self):
|
163 |
+
for prefix in ["", "/v1"]:
|
164 |
+
self.app.get(
|
165 |
+
prefix + "/langs",
|
166 |
+
summary="Get available languages",
|
167 |
+
)(self.get_available_langs)
|
168 |
+
|
169 |
+
self.app.post(
|
170 |
+
prefix + "/translate",
|
171 |
+
summary="translate text",
|
172 |
+
)(self.translate_completions)
|
173 |
+
|
174 |
+
self.app.post(
|
175 |
+
prefix + "/translate/ai",
|
176 |
+
summary="translate text with ai",
|
177 |
+
)(self.translate_ai_completions)
|
178 |
+
|
179 |
+
self.app.post(
|
180 |
+
prefix + "/detect",
|
181 |
+
summary="detect language",
|
182 |
+
)(self.detect_language)
|
183 |
+
|
184 |
+
class ArgParser(argparse.ArgumentParser):
|
185 |
+
def __init__(self, *args, **kwargs):
|
186 |
+
super(ArgParser, self).__init__(*args, **kwargs)
|
187 |
+
|
188 |
+
self.add_argument(
|
189 |
+
"-s",
|
190 |
+
"--server",
|
191 |
+
type=str,
|
192 |
+
default="0.0.0.0",
|
193 |
+
help="Server IP for HF LLM Chat API",
|
194 |
+
)
|
195 |
+
self.add_argument(
|
196 |
+
"-p",
|
197 |
+
"--port",
|
198 |
+
type=int,
|
199 |
+
default=23333,
|
200 |
+
help="Server Port for HF LLM Chat API",
|
201 |
+
)
|
202 |
+
|
203 |
+
self.add_argument(
|
204 |
+
"-d",
|
205 |
+
"--dev",
|
206 |
+
default=False,
|
207 |
+
action="store_true",
|
208 |
+
help="Run in dev mode",
|
209 |
+
)
|
210 |
+
|
211 |
+
self.args = self.parse_args(sys.argv[1:])
|
212 |
+
|
213 |
+
|
214 |
+
app = ChatAPIApp().app
|
215 |
+
|
216 |
+
app.add_middleware(
|
217 |
+
CORSMiddleware,
|
218 |
+
allow_origins=["*"],
|
219 |
+
allow_credentials=True,
|
220 |
+
allow_methods=["*"],
|
221 |
+
allow_headers=["*"],
|
222 |
+
)
|
223 |
+
|
224 |
+
if __name__ == "__main__":
|
225 |
+
args = ArgParser().args
|
226 |
+
if args.dev:
|
227 |
+
uvicorn.run("__main__:app", host=args.server, port=args.port, reload=True)
|
228 |
+
else:
|
229 |
+
uvicorn.run("__main__:app", host=args.server, port=args.port, reload=False)
|
230 |
+
|
231 |
+
# python -m apis.chat_api # [Docker] on product mode
|
232 |
+
# python -m apis.chat_api -d # [Dev] on develop mode
|
apis/lang_name.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{ "code": "auto", "name": "Detect language" },
|
3 |
+
{ "code": "en", "name": "English" },
|
4 |
+
{ "code": "am", "name": "Amharic" },
|
5 |
+
{ "code": "ar", "name": "Arabic" },
|
6 |
+
{ "code": "ti", "name": "Tigrinya" },
|
7 |
+
{ "code": "om", "name": "Oromo" },
|
8 |
+
{ "code": "so", "name": "Somali" },
|
9 |
+
{ "code": "ko", "name": "Korean" },
|
10 |
+
{ "code": "zh-CN", "name": "Chinese (Simplified)" },
|
11 |
+
{ "code": "zh-TW", "name": "Chinese (Traditional)" },
|
12 |
+
{ "code": "fr", "name": "French" },
|
13 |
+
{ "code": "de", "name": "German" },
|
14 |
+
{ "code": "it", "name": "Italian" },
|
15 |
+
{ "code": "ja", "name": "Japanese" }
|
16 |
+
]
|
17 |
+
|
apis/models/__init__.py
ADDED
File without changes
|
examples/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
#source
|
examples/chat_with_openai.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from openai import OpenAI
|
2 |
+
|
3 |
+
# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
|
4 |
+
base_url = "http://127.0.0.1:23333"
|
5 |
+
api_key = "sk-xxxxx"
|
6 |
+
|
7 |
+
client = OpenAI(base_url=base_url, api_key=api_key)
|
8 |
+
response = client.chat.completions.create(
|
9 |
+
model="mixtral-8x7b",
|
10 |
+
messages=[
|
11 |
+
{
|
12 |
+
"role": "user",
|
13 |
+
"content": "what is your model",
|
14 |
+
}
|
15 |
+
],
|
16 |
+
stream=True,
|
17 |
+
)
|
18 |
+
|
19 |
+
for chunk in response:
|
20 |
+
if chunk.choices[0].delta.content is not None:
|
21 |
+
print(chunk.choices[0].delta.content, end="", flush=True)
|
22 |
+
elif chunk.choices[0].finish_reason == "stop":
|
23 |
+
print()
|
24 |
+
else:
|
25 |
+
pass
|
examples/chat_with_post.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import ast
|
2 |
+
import httpx
|
3 |
+
import json
|
4 |
+
import re
|
5 |
+
|
6 |
+
# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
|
7 |
+
chat_api = "http://127.0.0.1:23333"
|
8 |
+
api_key = "sk-xxxxx"
|
9 |
+
requests_headers = {}
|
10 |
+
requests_payload = {
|
11 |
+
"model": "mixtral-8x7b",
|
12 |
+
"messages": [
|
13 |
+
{
|
14 |
+
"role": "user",
|
15 |
+
"content": "what is your model",
|
16 |
+
}
|
17 |
+
],
|
18 |
+
"stream": True,
|
19 |
+
}
|
20 |
+
|
21 |
+
with httpx.stream(
|
22 |
+
"POST",
|
23 |
+
chat_api + "/chat/completions",
|
24 |
+
headers=requests_headers,
|
25 |
+
json=requests_payload,
|
26 |
+
timeout=httpx.Timeout(connect=20, read=60, write=20, pool=None),
|
27 |
+
) as response:
|
28 |
+
# https://docs.aiohttp.org/en/stable/streams.html
|
29 |
+
# https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb
|
30 |
+
response_content = ""
|
31 |
+
for line in response.iter_lines():
|
32 |
+
remove_patterns = [r"^\s*data:\s*", r"^\s*\[DONE\]\s*"]
|
33 |
+
for pattern in remove_patterns:
|
34 |
+
line = re.sub(pattern, "", line).strip()
|
35 |
+
|
36 |
+
if line:
|
37 |
+
try:
|
38 |
+
line_data = json.loads(line)
|
39 |
+
except Exception as e:
|
40 |
+
try:
|
41 |
+
line_data = ast.literal_eval(line)
|
42 |
+
except:
|
43 |
+
print(f"Error: {line}")
|
44 |
+
raise e
|
45 |
+
# print(f"line: {line_data}")
|
46 |
+
delta_data = line_data["choices"][0]["delta"]
|
47 |
+
finish_reason = line_data["choices"][0]["finish_reason"]
|
48 |
+
if "role" in delta_data:
|
49 |
+
role = delta_data["role"]
|
50 |
+
if "content" in delta_data:
|
51 |
+
delta_content = delta_data["content"]
|
52 |
+
response_content += delta_content
|
53 |
+
print(delta_content, end="", flush=True)
|
54 |
+
if finish_reason == "stop":
|
55 |
+
print()
|
messagers/__init__.py
ADDED
File without changes
|
messagers/message_composer.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from pprint import pprint
|
3 |
+
|
4 |
+
|
5 |
+
class MessageComposer:
|
6 |
+
"""
|
7 |
+
models:
|
8 |
+
- mixtral-8x7b (mistralai/Mixtral-8x7B-Instruct-v0.1)
|
9 |
+
"""
|
10 |
+
|
11 |
+
def __init__(self, model: str = None):
|
12 |
+
self.inst_roles = ["user", "system", "inst"]
|
13 |
+
self.answer_roles = ["assistant", "bot", "answer"]
|
14 |
+
|
15 |
+
def concat_messages_by_role(self, messages):
|
16 |
+
def is_same_role(role1, role2):
|
17 |
+
if (
|
18 |
+
(role1 == role2)
|
19 |
+
or (role1 in self.inst_roles and role2 in self.inst_roles)
|
20 |
+
or (role1 in self.answer_roles and role2 in self.answer_roles)
|
21 |
+
):
|
22 |
+
return True
|
23 |
+
else:
|
24 |
+
return False
|
25 |
+
|
26 |
+
concat_messages = []
|
27 |
+
for message in messages:
|
28 |
+
role = message["role"]
|
29 |
+
content = message["content"]
|
30 |
+
if concat_messages and is_same_role(role, concat_messages[-1]["role"]):
|
31 |
+
concat_messages[-1]["content"] += "\n" + content
|
32 |
+
else:
|
33 |
+
if role in self.inst_roles:
|
34 |
+
message["role"] = "inst"
|
35 |
+
elif role in self.answer_roles:
|
36 |
+
message["role"] = "answer"
|
37 |
+
else:
|
38 |
+
message["role"] = "inst"
|
39 |
+
concat_messages.append(message)
|
40 |
+
return concat_messages
|
41 |
+
|
42 |
+
def merge(self, messages) -> str:
|
43 |
+
# <s> [INST] Instruction [/INST] Model answer </s> [INST] Follow-up instruction [/INST]
|
44 |
+
|
45 |
+
self.messages = self.concat_messages_by_role(messages)
|
46 |
+
self.merged_str = ""
|
47 |
+
self.cached_str = ""
|
48 |
+
for message in self.messages:
|
49 |
+
role = message["role"]
|
50 |
+
content = message["content"]
|
51 |
+
if role in self.inst_roles:
|
52 |
+
self.cached_str = f"[INST] {content} [/INST]"
|
53 |
+
elif role in self.answer_roles:
|
54 |
+
self.merged_str += f"<s> {self.cached_str} {content} </s>\n"
|
55 |
+
self.cached_str = ""
|
56 |
+
else:
|
57 |
+
self.cached_str = f"[INST] {content} [/INST]"
|
58 |
+
if self.cached_str:
|
59 |
+
self.merged_str += f"{self.cached_str}"
|
60 |
+
|
61 |
+
return self.merged_str
|
62 |
+
|
63 |
+
def split(self, merged_str) -> list:
|
64 |
+
self.messages = []
|
65 |
+
self.merged_str = merged_str
|
66 |
+
pair_pattern = (
|
67 |
+
r"<s>\s*\[INST\](?P<inst>[\s\S]*?)\[/INST\](?P<answer>[\s\S]*?)</s>"
|
68 |
+
)
|
69 |
+
pair_matches = re.finditer(pair_pattern, self.merged_str, re.MULTILINE)
|
70 |
+
pair_matches_list = list(pair_matches)
|
71 |
+
|
72 |
+
if len(pair_matches_list) <= 0:
|
73 |
+
self.messages = [
|
74 |
+
{
|
75 |
+
"role": "user",
|
76 |
+
"content": self.merged_str,
|
77 |
+
}
|
78 |
+
]
|
79 |
+
else:
|
80 |
+
for match in pair_matches_list:
|
81 |
+
inst = match.group("inst")
|
82 |
+
answer = match.group("answer")
|
83 |
+
self.messages.extend(
|
84 |
+
[
|
85 |
+
{"role": "user", "content": inst.strip()},
|
86 |
+
{"role": "assistant", "content": answer.strip()},
|
87 |
+
]
|
88 |
+
)
|
89 |
+
|
90 |
+
inst_pattern = r"\[INST\](?P<inst>[\s\S]*?)\[/INST\]"
|
91 |
+
inst_matches = re.finditer(inst_pattern, self.merged_str, re.MULTILINE)
|
92 |
+
inst_matches_list = list(inst_matches)
|
93 |
+
|
94 |
+
if len(inst_matches_list) > len(pair_matches_list):
|
95 |
+
self.messages.extend(
|
96 |
+
[
|
97 |
+
{
|
98 |
+
"role": "user",
|
99 |
+
"content": inst_matches_list[-1].group("inst").strip(),
|
100 |
+
}
|
101 |
+
]
|
102 |
+
)
|
103 |
+
|
104 |
+
return self.messages
|
105 |
+
|
106 |
+
|
107 |
+
if __name__ == "__main__":
|
108 |
+
composer = MessageComposer()
|
109 |
+
messages = [
|
110 |
+
{
|
111 |
+
"role": "system",
|
112 |
+
"content": "You are a LLM developed by OpenAI. Your name is GPT-4.",
|
113 |
+
},
|
114 |
+
{"role": "user", "content": "Hello, who are you?"},
|
115 |
+
{"role": "assistant", "content": "I am a bot."},
|
116 |
+
# {"role": "user", "content": "What is your name?"},
|
117 |
+
{"role": "assistant", "content": "My name is Bing."},
|
118 |
+
# {"role": "user", "content": "Tell me a joke."},
|
119 |
+
# {"role": "assistant", "content": "What is a robot's favorite type of music?"},
|
120 |
+
# {
|
121 |
+
# "role": "user",
|
122 |
+
# "content": "How many questions have I asked? Please list them.",
|
123 |
+
# },
|
124 |
+
]
|
125 |
+
merged_str = composer.merge(messages)
|
126 |
+
print(merged_str)
|
127 |
+
pprint(composer.split(merged_str))
|
128 |
+
# print(composer.merge(composer.split(merged_str)))
|
messagers/message_outputer.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
|
3 |
+
|
4 |
+
class OpenaiStreamOutputer:
|
5 |
+
"""
|
6 |
+
Create chat completion - OpenAI API Documentation
|
7 |
+
* https://platform.openai.com/docs/api-reference/chat/create
|
8 |
+
"""
|
9 |
+
|
10 |
+
def data_to_string(self, data={}, content_type=""):
|
11 |
+
data_str = f"{json.dumps(data)}"
|
12 |
+
|
13 |
+
return data_str
|
14 |
+
|
15 |
+
def output(self, content=None, content_type="Completions") -> str:
|
16 |
+
data = {
|
17 |
+
"created": 1700000000,
|
18 |
+
"id": "chatcmpl-hugginface",
|
19 |
+
"object": "chat.completion.chunk",
|
20 |
+
# "content_type": content_type,
|
21 |
+
"model": "hugginface",
|
22 |
+
"choices": [],
|
23 |
+
}
|
24 |
+
if content_type == "Role":
|
25 |
+
data["choices"] = [
|
26 |
+
{
|
27 |
+
"index": 0,
|
28 |
+
"delta": {"role": "assistant"},
|
29 |
+
"finish_reason": None,
|
30 |
+
}
|
31 |
+
]
|
32 |
+
elif content_type in [
|
33 |
+
"Completions",
|
34 |
+
"InternalSearchQuery",
|
35 |
+
"InternalSearchResult",
|
36 |
+
"SuggestedResponses",
|
37 |
+
]:
|
38 |
+
if content_type in ["InternalSearchQuery", "InternalSearchResult"]:
|
39 |
+
content += "\n"
|
40 |
+
data["choices"] = [
|
41 |
+
{
|
42 |
+
"index": 0,
|
43 |
+
"delta": {"content": content},
|
44 |
+
"finish_reason": None,
|
45 |
+
}
|
46 |
+
]
|
47 |
+
elif content_type == "Finished":
|
48 |
+
data["choices"] = [
|
49 |
+
{
|
50 |
+
"index": 0,
|
51 |
+
"delta": {},
|
52 |
+
"finish_reason": "stop",
|
53 |
+
}
|
54 |
+
]
|
55 |
+
else:
|
56 |
+
data["choices"] = [
|
57 |
+
{
|
58 |
+
"index": 0,
|
59 |
+
"delta": {},
|
60 |
+
"finish_reason": None,
|
61 |
+
}
|
62 |
+
]
|
63 |
+
return self.data_to_string(data, content_type)
|
models/__init__.py
ADDED
File without changes
|
networks/__init__.py
ADDED
File without changes
|
networks/message_streamer.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import re
|
3 |
+
import requests
|
4 |
+
from messagers.message_outputer import OpenaiStreamOutputer
|
5 |
+
from utils.logger import logger
|
6 |
+
from utils.enver import enver
|
7 |
+
|
8 |
+
|
9 |
+
class MessageStreamer:
|
10 |
+
MODEL_MAP = {
|
11 |
+
"mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1", # 72.62, fast [Recommended]
|
12 |
+
"mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2", # 65.71, fast
|
13 |
+
"openchat-3.5": "openchat/openchat_3.5", # 61.24, fast
|
14 |
+
# "zephyr-7b-alpha": "HuggingFaceH4/zephyr-7b-alpha", # 59.5, fast
|
15 |
+
# "zephyr-7b-beta": "HuggingFaceH4/zephyr-7b-beta", # 61.95, slow
|
16 |
+
"default": "mistralai/Mixtral-8x7B-Instruct-v0.1",
|
17 |
+
}
|
18 |
+
|
19 |
+
def __init__(self, model: str):
|
20 |
+
if model in self.MODEL_MAP.keys():
|
21 |
+
self.model = model
|
22 |
+
else:
|
23 |
+
self.model = "default"
|
24 |
+
self.model_fullname = self.MODEL_MAP[self.model]
|
25 |
+
self.message_outputer = OpenaiStreamOutputer()
|
26 |
+
|
27 |
+
def parse_line(self, line):
|
28 |
+
line = line.decode("utf-8")
|
29 |
+
line = re.sub(r"data:\s*", "", line)
|
30 |
+
data = json.loads(line)
|
31 |
+
content = data["token"]["text"]
|
32 |
+
return content
|
33 |
+
|
34 |
+
def chat(
|
35 |
+
self,
|
36 |
+
prompt: str = None,
|
37 |
+
temperature: float = 0.01,
|
38 |
+
max_new_tokens: int = 8192,
|
39 |
+
stream: bool = True,
|
40 |
+
yield_output: bool = False,
|
41 |
+
):
|
42 |
+
# https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
|
43 |
+
# curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
|
44 |
+
self.request_url = (
|
45 |
+
f"https://api-inference.huggingface.co/models/{self.model_fullname}"
|
46 |
+
)
|
47 |
+
self.request_headers = {
|
48 |
+
"Content-Type": "application/json",
|
49 |
+
}
|
50 |
+
# References:
|
51 |
+
# huggingface_hub/inference/_client.py:
|
52 |
+
# class InferenceClient > def text_generation()
|
53 |
+
# huggingface_hub/inference/_text_generation.py:
|
54 |
+
# class TextGenerationRequest > param `stream`
|
55 |
+
# https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
|
56 |
+
self.request_body = {
|
57 |
+
"inputs": prompt,
|
58 |
+
"parameters": {
|
59 |
+
"temperature": max(temperature, 0.01), # must be positive
|
60 |
+
"max_new_tokens": max_new_tokens,
|
61 |
+
"return_full_text": False,
|
62 |
+
},
|
63 |
+
"stream": stream,
|
64 |
+
}
|
65 |
+
logger.back(self.request_url)
|
66 |
+
enver.set_envs(proxies=True)
|
67 |
+
stream = requests.post(
|
68 |
+
self.request_url,
|
69 |
+
headers=self.request_headers,
|
70 |
+
json=self.request_body,
|
71 |
+
proxies=enver.requests_proxies,
|
72 |
+
stream=stream,
|
73 |
+
)
|
74 |
+
status_code = stream.status_code
|
75 |
+
if status_code == 200:
|
76 |
+
logger.success(status_code)
|
77 |
+
else:
|
78 |
+
logger.err(status_code)
|
79 |
+
|
80 |
+
for line in stream.iter_lines():
|
81 |
+
if not line:
|
82 |
+
continue
|
83 |
+
|
84 |
+
content = self.parse_line(line)
|
85 |
+
|
86 |
+
if content.strip() == "</s>":
|
87 |
+
content_type = "Finished"
|
88 |
+
logger.success("\n[Finished]")
|
89 |
+
else:
|
90 |
+
content_type = "Completions"
|
91 |
+
logger.back(content, end="")
|
92 |
+
|
93 |
+
if yield_output:
|
94 |
+
output = self.message_outputer.output(
|
95 |
+
content=content, content_type=content_type
|
96 |
+
)
|
97 |
+
yield output
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi
|
2 |
+
pydantic
|
3 |
+
uvicorn
|
4 |
+
googletrans==3.1.0a0
|
5 |
+
torch
|
6 |
+
transformers
|
7 |
+
transformers[sentencepiece]
|
8 |
+
requests
|
9 |
+
termcolor
|
utils/__init__.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
|
5 |
+
from pathlib import Path
|
6 |
+
|
7 |
+
|
8 |
+
class OSEnver:
|
9 |
+
def __init__(self):
|
10 |
+
self.envs_stack = []
|
11 |
+
self.envs = os.environ.copy()
|
12 |
+
|
13 |
+
def store_envs(self):
|
14 |
+
self.envs_stack.append(self.envs)
|
15 |
+
|
16 |
+
def restore_envs(self):
|
17 |
+
self.envs = self.envs_stack.pop()
|
18 |
+
if self.global_scope:
|
19 |
+
os.environ = self.envs
|
20 |
+
|
21 |
+
def set_envs(self, secrets=True, proxies=None, store_envs=True):
|
22 |
+
# caller_info = inspect.stack()[1]
|
23 |
+
# logger.back(f"OS Envs is set by: {caller_info.filename}")
|
24 |
+
|
25 |
+
if store_envs:
|
26 |
+
self.store_envs()
|
27 |
+
|
28 |
+
if secrets:
|
29 |
+
secrets_path = Path(__file__).parents[1] / "secrets.json"
|
30 |
+
if secrets_path.exists():
|
31 |
+
with open(secrets_path, "r") as rf:
|
32 |
+
secrets = json.load(rf)
|
33 |
+
else:
|
34 |
+
secrets = {}
|
35 |
+
|
36 |
+
if proxies:
|
37 |
+
for proxy_env in ["http_proxy", "https_proxy"]:
|
38 |
+
if isinstance(proxies, str):
|
39 |
+
self.envs[proxy_env] = proxies
|
40 |
+
elif "http_proxy" in secrets.keys():
|
41 |
+
self.envs[proxy_env] = secrets["http_proxy"]
|
42 |
+
elif os.getenv("http_proxy"):
|
43 |
+
self.envs[proxy_env] = os.getenv("http_proxy")
|
44 |
+
else:
|
45 |
+
continue
|
46 |
+
|
47 |
+
self.proxy = (
|
48 |
+
self.envs.get("all_proxy")
|
49 |
+
or self.envs.get("http_proxy")
|
50 |
+
or self.envs.get("https_proxy")
|
51 |
+
or None
|
52 |
+
)
|
53 |
+
self.requests_proxies = {
|
54 |
+
"http": self.proxy,
|
55 |
+
"https": self.proxy,
|
56 |
+
}
|
57 |
+
|
58 |
+
# https://www.proxynova.com/proxy-server-list/country-us/
|
59 |
+
|
60 |
+
print(f"Using proxy: [{self.proxy}]")
|
61 |
+
# r = requests.get(
|
62 |
+
# "http://ifconfig.me/ip",
|
63 |
+
# proxies=self.requests_proxies,
|
64 |
+
# timeout=10,
|
65 |
+
# )
|
66 |
+
# print(f"[r.status_code] r.text")
|
67 |
+
|
68 |
+
|
69 |
+
enver = OSEnver()
|
utils/enver.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
|
4 |
+
from pathlib import Path
|
5 |
+
from utils.logger import logger
|
6 |
+
|
7 |
+
|
8 |
+
class OSEnver:
|
9 |
+
def __init__(self):
|
10 |
+
self.envs_stack = []
|
11 |
+
self.envs = os.environ.copy()
|
12 |
+
|
13 |
+
def store_envs(self):
|
14 |
+
self.envs_stack.append(self.envs)
|
15 |
+
|
16 |
+
def restore_envs(self):
|
17 |
+
self.envs = self.envs_stack.pop()
|
18 |
+
|
19 |
+
def set_envs(self, secrets=True, proxies=None, store_envs=True):
|
20 |
+
# caller_info = inspect.stack()[1]
|
21 |
+
# logger.back(f"OS Envs is set by: {caller_info.filename}")
|
22 |
+
|
23 |
+
if store_envs:
|
24 |
+
self.store_envs()
|
25 |
+
|
26 |
+
if secrets:
|
27 |
+
secrets_path = Path(__file__).parents[1] / "secrets.json"
|
28 |
+
if secrets_path.exists():
|
29 |
+
with open(secrets_path, "r") as rf:
|
30 |
+
secrets = json.load(rf)
|
31 |
+
else:
|
32 |
+
secrets = {}
|
33 |
+
|
34 |
+
if proxies:
|
35 |
+
for proxy_env in ["http_proxy", "https_proxy"]:
|
36 |
+
if isinstance(proxies, str):
|
37 |
+
self.envs[proxy_env] = proxies
|
38 |
+
elif "http_proxy" in secrets.keys():
|
39 |
+
self.envs[proxy_env] = secrets["http_proxy"]
|
40 |
+
elif os.getenv("http_proxy"):
|
41 |
+
self.envs[proxy_env] = os.getenv("http_proxy")
|
42 |
+
else:
|
43 |
+
continue
|
44 |
+
|
45 |
+
self.proxy = (
|
46 |
+
self.envs.get("all_proxy")
|
47 |
+
or self.envs.get("http_proxy")
|
48 |
+
or self.envs.get("https_proxy")
|
49 |
+
or None
|
50 |
+
)
|
51 |
+
self.requests_proxies = {
|
52 |
+
"http": self.proxy,
|
53 |
+
"https": self.proxy,
|
54 |
+
}
|
55 |
+
|
56 |
+
if self.proxy:
|
57 |
+
logger.note(f"Using proxy: [{self.proxy}]")
|
58 |
+
|
59 |
+
|
60 |
+
enver = OSEnver()
|
utils/logger.py
ADDED
@@ -0,0 +1,269 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import datetime
|
2 |
+
import functools
|
3 |
+
import inspect
|
4 |
+
import logging
|
5 |
+
import os
|
6 |
+
import shutil
|
7 |
+
import subprocess
|
8 |
+
from termcolor import colored
|
9 |
+
|
10 |
+
|
11 |
+
def add_fillers(text, filler="=", fill_side="both"):
|
12 |
+
terminal_width = shutil.get_terminal_size().columns
|
13 |
+
text = text.strip()
|
14 |
+
text_width = len(text)
|
15 |
+
if text_width >= terminal_width:
|
16 |
+
return text
|
17 |
+
|
18 |
+
if fill_side[0].lower() == "b":
|
19 |
+
leading_fill_str = filler * ((terminal_width - text_width) // 2 - 1) + " "
|
20 |
+
trailing_fill_str = " " + filler * (
|
21 |
+
terminal_width - text_width - len(leading_fill_str) - 1
|
22 |
+
)
|
23 |
+
elif fill_side[0].lower() == "l":
|
24 |
+
leading_fill_str = filler * (terminal_width - text_width - 1) + " "
|
25 |
+
trailing_fill_str = ""
|
26 |
+
elif fill_side[0].lower() == "r":
|
27 |
+
leading_fill_str = ""
|
28 |
+
trailing_fill_str = " " + filler * (terminal_width - text_width - 1)
|
29 |
+
else:
|
30 |
+
raise ValueError("Invalid fill_side")
|
31 |
+
|
32 |
+
filled_str = f"{leading_fill_str}{text}{trailing_fill_str}"
|
33 |
+
return filled_str
|
34 |
+
|
35 |
+
|
36 |
+
class OSLogger(logging.Logger):
|
37 |
+
LOG_METHODS = {
|
38 |
+
"err": ("error", "red"),
|
39 |
+
"warn": ("warning", "light_red"),
|
40 |
+
"note": ("info", "light_magenta"),
|
41 |
+
"mesg": ("info", "light_cyan"),
|
42 |
+
"file": ("info", "light_blue"),
|
43 |
+
"line": ("info", "white"),
|
44 |
+
"success": ("info", "light_green"),
|
45 |
+
"fail": ("info", "light_red"),
|
46 |
+
"back": ("debug", "light_cyan"),
|
47 |
+
}
|
48 |
+
INDENT_METHODS = [
|
49 |
+
"indent",
|
50 |
+
"set_indent",
|
51 |
+
"reset_indent",
|
52 |
+
"store_indent",
|
53 |
+
"restore_indent",
|
54 |
+
"log_indent",
|
55 |
+
]
|
56 |
+
LEVEL_METHODS = [
|
57 |
+
"set_level",
|
58 |
+
"store_level",
|
59 |
+
"restore_level",
|
60 |
+
"quiet",
|
61 |
+
"enter_quiet",
|
62 |
+
"exit_quiet",
|
63 |
+
]
|
64 |
+
LEVEL_NAMES = {
|
65 |
+
"critical": logging.CRITICAL,
|
66 |
+
"error": logging.ERROR,
|
67 |
+
"warning": logging.WARNING,
|
68 |
+
"info": logging.INFO,
|
69 |
+
"debug": logging.DEBUG,
|
70 |
+
}
|
71 |
+
|
72 |
+
def __init__(self, name=None, prefix=False):
|
73 |
+
if not name:
|
74 |
+
frame = inspect.stack()[1]
|
75 |
+
module = inspect.getmodule(frame[0])
|
76 |
+
name = module.__name__
|
77 |
+
|
78 |
+
super().__init__(name)
|
79 |
+
self.setLevel(logging.INFO)
|
80 |
+
|
81 |
+
if prefix:
|
82 |
+
formatter_prefix = "[%(asctime)s] - [%(name)s] - [%(levelname)s]\n"
|
83 |
+
else:
|
84 |
+
formatter_prefix = ""
|
85 |
+
|
86 |
+
self.formatter = logging.Formatter(formatter_prefix + "%(message)s")
|
87 |
+
|
88 |
+
stream_handler = logging.StreamHandler()
|
89 |
+
stream_handler.setLevel(logging.INFO)
|
90 |
+
stream_handler.setFormatter(self.formatter)
|
91 |
+
self.addHandler(stream_handler)
|
92 |
+
|
93 |
+
self.log_indent = 0
|
94 |
+
self.log_indents = []
|
95 |
+
|
96 |
+
self.log_level = "info"
|
97 |
+
self.log_levels = []
|
98 |
+
|
99 |
+
def indent(self, indent=2):
|
100 |
+
self.log_indent += indent
|
101 |
+
|
102 |
+
def set_indent(self, indent=2):
|
103 |
+
self.log_indent = indent
|
104 |
+
|
105 |
+
def reset_indent(self):
|
106 |
+
self.log_indent = 0
|
107 |
+
|
108 |
+
def store_indent(self):
|
109 |
+
self.log_indents.append(self.log_indent)
|
110 |
+
|
111 |
+
def restore_indent(self):
|
112 |
+
self.log_indent = self.log_indents.pop(-1)
|
113 |
+
|
114 |
+
def set_level(self, level):
|
115 |
+
self.log_level = level
|
116 |
+
self.setLevel(self.LEVEL_NAMES[level])
|
117 |
+
|
118 |
+
def store_level(self):
|
119 |
+
self.log_levels.append(self.log_level)
|
120 |
+
|
121 |
+
def restore_level(self):
|
122 |
+
self.log_level = self.log_levels.pop(-1)
|
123 |
+
self.set_level(self.log_level)
|
124 |
+
|
125 |
+
def quiet(self):
|
126 |
+
self.set_level("critical")
|
127 |
+
|
128 |
+
def enter_quiet(self, quiet=False):
|
129 |
+
if quiet:
|
130 |
+
self.store_level()
|
131 |
+
self.quiet()
|
132 |
+
|
133 |
+
def exit_quiet(self, quiet=False):
|
134 |
+
if quiet:
|
135 |
+
self.restore_level()
|
136 |
+
|
137 |
+
def log(
|
138 |
+
self,
|
139 |
+
level,
|
140 |
+
color,
|
141 |
+
msg,
|
142 |
+
indent=0,
|
143 |
+
fill=False,
|
144 |
+
fill_side="both",
|
145 |
+
end="\n",
|
146 |
+
*args,
|
147 |
+
**kwargs,
|
148 |
+
):
|
149 |
+
if type(msg) == str:
|
150 |
+
msg_str = msg
|
151 |
+
else:
|
152 |
+
msg_str = repr(msg)
|
153 |
+
quotes = ["'", '"']
|
154 |
+
if msg_str[0] in quotes and msg_str[-1] in quotes:
|
155 |
+
msg_str = msg_str[1:-1]
|
156 |
+
|
157 |
+
indent_str = " " * (self.log_indent + indent)
|
158 |
+
indented_msg = "\n".join([indent_str + line for line in msg_str.split("\n")])
|
159 |
+
|
160 |
+
if fill:
|
161 |
+
indented_msg = add_fillers(indented_msg, fill_side=fill_side)
|
162 |
+
|
163 |
+
handler = self.handlers[0]
|
164 |
+
handler.terminator = end
|
165 |
+
|
166 |
+
getattr(self, level)(colored(indented_msg, color), *args, **kwargs)
|
167 |
+
|
168 |
+
def route_log(self, method, msg, *args, **kwargs):
|
169 |
+
level, method = method
|
170 |
+
functools.partial(self.log, level, method, msg)(*args, **kwargs)
|
171 |
+
|
172 |
+
def err(self, msg: str = "", *args, **kwargs):
|
173 |
+
self.route_log(("error", "red"), msg, *args, **kwargs)
|
174 |
+
|
175 |
+
def warn(self, msg: str = "", *args, **kwargs):
|
176 |
+
self.route_log(("warning", "light_red"), msg, *args, **kwargs)
|
177 |
+
|
178 |
+
def note(self, msg: str = "", *args, **kwargs):
|
179 |
+
self.route_log(("info", "light_magenta"), msg, *args, **kwargs)
|
180 |
+
|
181 |
+
def mesg(self, msg: str = "", *args, **kwargs):
|
182 |
+
self.route_log(("info", "light_cyan"), msg, *args, **kwargs)
|
183 |
+
|
184 |
+
def file(self, msg: str = "", *args, **kwargs):
|
185 |
+
self.route_log(("info", "light_blue"), msg, *args, **kwargs)
|
186 |
+
|
187 |
+
def line(self, msg: str = "", *args, **kwargs):
|
188 |
+
self.route_log(("info", "white"), msg, *args, **kwargs)
|
189 |
+
|
190 |
+
def success(self, msg: str = "", *args, **kwargs):
|
191 |
+
self.route_log(("info", "light_green"), msg, *args, **kwargs)
|
192 |
+
|
193 |
+
def fail(self, msg: str = "", *args, **kwargs):
|
194 |
+
self.route_log(("info", "light_red"), msg, *args, **kwargs)
|
195 |
+
|
196 |
+
def back(self, msg: str = "", *args, **kwargs):
|
197 |
+
self.route_log(("debug", "light_cyan"), msg, *args, **kwargs)
|
198 |
+
|
199 |
+
|
200 |
+
logger = OSLogger()
|
201 |
+
|
202 |
+
|
203 |
+
def shell_cmd(cmd, getoutput=False, showcmd=True, env=None):
|
204 |
+
if showcmd:
|
205 |
+
logger.info(colored(f"\n$ [{os.getcwd()}]", "light_blue"))
|
206 |
+
logger.info(colored(f" $ {cmd}\n", "light_cyan"))
|
207 |
+
if getoutput:
|
208 |
+
output = subprocess.getoutput(cmd, env=env)
|
209 |
+
return output
|
210 |
+
else:
|
211 |
+
subprocess.run(cmd, shell=True, env=env)
|
212 |
+
|
213 |
+
|
214 |
+
class Runtimer:
|
215 |
+
def __enter__(self):
|
216 |
+
self.t1, _ = self.start_time()
|
217 |
+
return self
|
218 |
+
|
219 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
220 |
+
self.t2, _ = self.end_time()
|
221 |
+
self.elapsed_time(self.t2 - self.t1)
|
222 |
+
|
223 |
+
def start_time(self):
|
224 |
+
t1 = datetime.datetime.now()
|
225 |
+
self.logger_time("start", t1)
|
226 |
+
return t1, self.time2str(t1)
|
227 |
+
|
228 |
+
def end_time(self):
|
229 |
+
t2 = datetime.datetime.now()
|
230 |
+
self.logger_time("end", t2)
|
231 |
+
return t2, self.time2str(t2)
|
232 |
+
|
233 |
+
def elapsed_time(self, dt=None):
|
234 |
+
if dt is None:
|
235 |
+
dt = self.t2 - self.t1
|
236 |
+
self.logger_time("elapsed", dt)
|
237 |
+
return dt, self.time2str(dt)
|
238 |
+
|
239 |
+
def logger_time(self, time_type, t):
|
240 |
+
time_types = {
|
241 |
+
"start": "Start",
|
242 |
+
"end": "End",
|
243 |
+
"elapsed": "Elapsed",
|
244 |
+
}
|
245 |
+
time_str = add_fillers(
|
246 |
+
colored(
|
247 |
+
f"{time_types[time_type]} time: [ {self.time2str(t)} ]",
|
248 |
+
"light_magenta",
|
249 |
+
),
|
250 |
+
fill_side="both",
|
251 |
+
)
|
252 |
+
logger.line(time_str)
|
253 |
+
|
254 |
+
# Convert time to string
|
255 |
+
def time2str(self, t):
|
256 |
+
datetime_str_format = "%Y-%m-%d %H:%M:%S"
|
257 |
+
if isinstance(t, datetime.datetime):
|
258 |
+
return t.strftime(datetime_str_format)
|
259 |
+
elif isinstance(t, datetime.timedelta):
|
260 |
+
hours = t.seconds // 3600
|
261 |
+
hour_str = f"{hours} hr" if hours > 0 else ""
|
262 |
+
minutes = (t.seconds // 60) % 60
|
263 |
+
minute_str = f"{minutes:>2} min" if minutes > 0 else ""
|
264 |
+
seconds = t.seconds % 60
|
265 |
+
second_str = f"{seconds:>2} s"
|
266 |
+
time_str = " ".join([hour_str, minute_str, second_str]).strip()
|
267 |
+
return time_str
|
268 |
+
else:
|
269 |
+
return str(t)
|