Petro commited on
Commit
3fa3baf
1 Parent(s): 011b7b6

First model version

Browse files
Files changed (2) hide show
  1. main.py +8 -22
  2. requirements.txt +1 -3
main.py CHANGED
@@ -1,33 +1,19 @@
1
- import os
2
-
3
- from langchain.llms import CTransformers
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
 
7
  file_name = "zephyr-7b-beta.Q4_K_S.gguf"
8
- config = {
9
- "max_new_token": 10,
10
- "repetition_penalty": 1.1,
11
- "temperature": 0.5,
12
- "top_k": 50,
13
- "top_p": 0.9,
14
- "stream": False,
15
- "threads": 3,
16
- }
17
- llm = CTransformers(
18
- model=file_name,
19
- model_type="mistral",
20
- lib="avx2",
21
- **config
22
  )
23
- print(llm)
24
-
25
 
 
26
  class validation(BaseModel):
27
  prompt: str
28
  #Fast API
29
 
30
-
31
  app = FastAPI()
32
 
33
  @app.post("/llm_on_cpu")
@@ -36,5 +22,5 @@ async def stream(item: validation):
36
  E_INST = "</s>"
37
  user, assistant = "<|user|>", "<|assistant|>"
38
  prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
39
- print(prompt)
40
- return llm.invoke(prompt)
 
1
+ from ctransformers import AutoModelForCausalLM
 
 
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
 
5
  file_name = "zephyr-7b-beta.Q4_K_S.gguf"
6
+ llm = AutoModelForCausalLM.from_pretrained(file_name,
7
+ model_type='mistral',
8
+ max_new_tokens = 1096,
9
+ threads = 3,
 
 
 
 
 
 
 
 
 
 
10
  )
 
 
11
 
12
+ #Pydantic object
13
  class validation(BaseModel):
14
  prompt: str
15
  #Fast API
16
 
 
17
  app = FastAPI()
18
 
19
  @app.post("/llm_on_cpu")
 
22
  E_INST = "</s>"
23
  user, assistant = "<|user|>", "<|assistant|>"
24
  prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
25
+
26
+ return llm(prompt)
requirements.txt CHANGED
@@ -5,6 +5,4 @@ uvicorn
5
  requests
6
  python-dotenv
7
  ctransformers
8
- torch
9
- langchain==0.1.9
10
- ctransformers
 
5
  requests
6
  python-dotenv
7
  ctransformers
8
+ torch