Spaces:
Runtime error
Runtime error
MORE PARAMETERS
Browse files- app.py +15 -12
- llm_backend.py +1 -4
app.py
CHANGED
@@ -15,18 +15,21 @@ import sys
|
|
15 |
llm = LlmBackend()
|
16 |
_lock = threading.Lock()
|
17 |
|
18 |
-
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT'
|
19 |
-
CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', '500'))
|
20 |
-
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR'
|
21 |
-
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', '').lower() ==
|
22 |
-
ENABLE_GPU = os.environ.get('ENABLE_GPU', '').lower() ==
|
23 |
-
GPU_LAYERS = int(os.environ.get('GPU_LAYERS', '0'))
|
24 |
-
CHAT_FORMAT = os.environ.get('CHAT_FORMAT'
|
25 |
-
REPO_NAME = os.environ.get('REPO_NAME'
|
26 |
-
MODEL_NAME = os.environ.get('MODEL_NAME'
|
27 |
-
DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL'
|
28 |
-
DATA_FILENAME = os.environ.get('DATA_FILENAME'
|
29 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
|
|
|
30 |
|
31 |
# Create a lock object
|
32 |
lock = threading.Lock()
|
@@ -174,5 +177,5 @@ if __name__ == "__main__":
|
|
174 |
# scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
175 |
# scheduler.start()
|
176 |
|
177 |
-
app.run(host=
|
178 |
|
|
|
15 |
llm = LlmBackend()
|
16 |
_lock = threading.Lock()
|
17 |
|
18 |
+
SYSTEM_PROMPT = os.environ.get('SYSTEM_PROMPT', default="Ты — русскоязычный автоматический ассистент. Ты максимально точно и отвечаешь на запросы пользователя, используя русский язык.")
|
19 |
+
CONTEXT_SIZE = int(os.environ.get('CONTEXT_SIZE', default='500'))
|
20 |
+
HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR', default='/home/user/app/.cache')
|
21 |
+
USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', default='False').lower() == 'true'
|
22 |
+
ENABLE_GPU = os.environ.get('ENABLE_GPU', default='False').lower() == 'true'
|
23 |
+
GPU_LAYERS = int(os.environ.get('GPU_LAYERS', default='0'))
|
24 |
+
CHAT_FORMAT = os.environ.get('CHAT_FORMAT', default='llama-2')
|
25 |
+
REPO_NAME = os.environ.get('REPO_NAME', default='IlyaGusev/saiga2_7b_gguf')
|
26 |
+
MODEL_NAME = os.environ.get('MODEL_NAME', default='model-q4_K.gguf')
|
27 |
+
DATASET_REPO_URL = os.environ.get('DATASET_REPO_URL', default="https://huggingface.co/datasets/muryshev/saiga-chat")
|
28 |
+
DATA_FILENAME = os.environ.get('DATA_FILENAME', default="data-saiga-cuda-release.xml")
|
29 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
30 |
+
APP_HOST = os.environ.get('APP_HOST', default='0.0.0.0')
|
31 |
+
APP_PORT = int(os.environ.get('APP_PORT', default='7860'))
|
32 |
+
FLASK_THREADED = os.environ.get('FLASK_THREADED', default='False').lower() == "true"
|
33 |
|
34 |
# Create a lock object
|
35 |
lock = threading.Lock()
|
|
|
177 |
# scheduler.add_job(check_last_request_time, trigger='interval', minutes=1)
|
178 |
# scheduler.start()
|
179 |
|
180 |
+
app.run(host=APP_HOST, port=APP_PORT, debug=False, threaded=FLASK_THREADED)
|
181 |
|
llm_backend.py
CHANGED
@@ -176,7 +176,4 @@ class LlmBackend:
|
|
176 |
except Exception as e:
|
177 |
log.error('generate_tokens - error')
|
178 |
log.error(e)
|
179 |
-
yield b'' # End of chunk
|
180 |
-
|
181 |
-
|
182 |
-
|
|
|
176 |
except Exception as e:
|
177 |
log.error('generate_tokens - error')
|
178 |
log.error(e)
|
179 |
+
yield b'' # End of chunk
|
|
|
|
|
|