Spaces:
Runtime error
Runtime error
Commit
·
62f21a9
1
Parent(s):
4992462
fixed config issue
Browse files- .idea/misc.xml +1 -1
- main/config.yaml +7 -3
- main/main.py +1 -1
- main/routes.py +4 -2
.idea/misc.xml
CHANGED
@@ -3,7 +3,7 @@
|
|
3 |
<component name="Black">
|
4 |
<option name="sdkName" value="Python 3.13 (Inference-API)" />
|
5 |
</component>
|
6 |
-
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="Python 3.
|
7 |
<output url="file://$PROJECT_DIR$/out" />
|
8 |
</component>
|
9 |
</project>
|
|
|
3 |
<component name="Black">
|
4 |
<option name="sdkName" value="Python 3.13 (Inference-API)" />
|
5 |
</component>
|
6 |
+
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="Python 3.13 (Inference-API) (2)" project-jdk-type="Python SDK">
|
7 |
<output url="file://$PROJECT_DIR$/out" />
|
8 |
</component>
|
9 |
</project>
|
main/config.yaml
CHANGED
@@ -5,8 +5,8 @@ server:
|
|
5 |
max_batch_size: 1
|
6 |
|
7 |
llm_server:
|
8 |
-
host: "teamgenki-llmserver.hf.space"
|
9 |
-
port:
|
10 |
timeout: 60.0
|
11 |
api_prefix: "/api/v1" # This will be used for route prefixing
|
12 |
endpoints:
|
@@ -17,4 +17,8 @@ llm_server:
|
|
17 |
system_validate: "/system/validate"
|
18 |
model_initialize: "/model/initialize"
|
19 |
model_initialize_embedding: "/model/initialize/embedding"
|
20 |
-
model_download: "/model/download"
|
|
|
|
|
|
|
|
|
|
5 |
max_batch_size: 1
|
6 |
|
7 |
llm_server:
|
8 |
+
host: "0.0.0.0" # "teamgenki-llmserver.hf.space"
|
9 |
+
port: 8001 # Will be ignored for hf.space URLs
|
10 |
timeout: 60.0
|
11 |
api_prefix: "/api/v1" # This will be used for route prefixing
|
12 |
endpoints:
|
|
|
17 |
system_validate: "/system/validate"
|
18 |
model_initialize: "/model/initialize"
|
19 |
model_initialize_embedding: "/model/initialize/embedding"
|
20 |
+
model_download: "/model/download"
|
21 |
+
|
22 |
+
model:
|
23 |
+
defaults:
|
24 |
+
model_name: "microsoft/Phi-3.5-mini-instruct"
|
main/main.py
CHANGED
@@ -57,7 +57,7 @@ def create_app():
|
|
57 |
api = InferenceApi(config)
|
58 |
|
59 |
# Initialize router with API instance
|
60 |
-
init_router(api)
|
61 |
|
62 |
# Create LitServer instance
|
63 |
server = ls.LitServer(
|
|
|
57 |
api = InferenceApi(config)
|
58 |
|
59 |
# Initialize router with API instance
|
60 |
+
init_router(api, config)
|
61 |
|
62 |
# Create LitServer instance
|
63 |
server = ls.LitServer(
|
main/routes.py
CHANGED
@@ -18,11 +18,13 @@ from .schemas import (
|
|
18 |
router = APIRouter()
|
19 |
logger = logging.getLogger(__name__)
|
20 |
api = None
|
|
|
21 |
|
22 |
-
def init_router(inference_api: InferenceApi):
|
23 |
"""Initialize router with an already setup API instance"""
|
24 |
-
global api
|
25 |
api = inference_api
|
|
|
26 |
logger.info("Router initialized with Inference API instance")
|
27 |
|
28 |
@router.post("/generate")
|
|
|
18 |
router = APIRouter()
|
19 |
logger = logging.getLogger(__name__)
|
20 |
api = None
|
21 |
+
config = None
|
22 |
|
23 |
+
def init_router(inference_api: InferenceApi, conf):
|
24 |
"""Initialize router with an already setup API instance"""
|
25 |
+
global api, config
|
26 |
api = inference_api
|
27 |
+
config = conf
|
28 |
logger.info("Router initialized with Inference API instance")
|
29 |
|
30 |
@router.post("/generate")
|