saifeddinemk commited on
Commit
d872e94
1 Parent(s): 5ba43ef

Fixed app v2

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -3,20 +3,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
5
 
6
- # Define model paths
7
- model_path = "/home/migel/Llama-3.1-WhiteRabbitNeo-2-8B"
8
  output_file_path = "/home/user/conversations.jsonl"
9
 
10
- # Load model and tokenizer
11
  model = AutoModelForCausalLM.from_pretrained(
12
- model_path,
13
  torch_dtype=torch.float16,
14
  device_map="auto",
15
  load_in_4bit=False,
16
  trust_remote_code=False,
17
  )
18
 
19
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
20
 
21
  # Initialize FastAPI app
22
  app = FastAPI()
@@ -26,8 +26,7 @@ class PromptRequest(BaseModel):
26
  user_input: str
27
 
28
  def generate_text(instruction):
29
- tokens = tokenizer.encode(instruction)
30
- tokens = torch.LongTensor(tokens).unsqueeze(0).to("cuda")
31
 
32
  instance = {
33
  "input_ids": tokens,
@@ -37,7 +36,7 @@ def generate_text(instruction):
37
  "top_k": 50,
38
  }
39
 
40
- length = len(tokens[0])
41
  with torch.no_grad():
42
  rest = model.generate(
43
  input_ids=tokens,
 
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
5
 
6
+ # Hugging Face model repository path
7
+ model_name = "WhiteRabbitNeo/WhiteRabbitNeo-13B-v1"
8
  output_file_path = "/home/user/conversations.jsonl"
9
 
10
+ # Load model and tokenizer from Hugging Face model hub
11
  model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
  torch_dtype=torch.float16,
14
  device_map="auto",
15
  load_in_4bit=False,
16
  trust_remote_code=False,
17
  )
18
 
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
20
 
21
  # Initialize FastAPI app
22
  app = FastAPI()
 
26
  user_input: str
27
 
28
  def generate_text(instruction):
29
+ tokens = tokenizer.encode(instruction, return_tensors="pt").to("cuda")
 
30
 
31
  instance = {
32
  "input_ids": tokens,
 
36
  "top_k": 50,
37
  }
38
 
39
+ length = tokens.size(1)
40
  with torch.no_grad():
41
  rest = model.generate(
42
  input_ids=tokens,