OjciecTadeusz commited on
Commit
dff7757
·
verified ·
1 Parent(s): f66db79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -12,11 +12,14 @@ app = FastAPI()
12
  # Load model and tokenizer
13
  model_name = "Qwen/Qwen2.5-Coder-32B"
14
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  device_map="auto",
18
  trust_remote_code=True,
19
- torch_dtype=torch.float16
 
20
  )
21
 
22
  def format_chat_response(response_text, prompt_tokens, completion_tokens):
@@ -91,12 +94,15 @@ def chat_interface(message, history):
91
  # Add current message
92
  messages.append({"role": "user", "content": message})
93
 
 
 
 
 
94
  # Get response
95
- response = chat_completion(Request(scope={"type": "http"}, receive=None))
96
  if isinstance(response, JSONResponse):
97
- response_data = response.body.decode()
98
- response_json = json.loads(response_data)
99
- return response_json["choices"][0]["message"]["content"]
100
  return "Error generating response"
101
 
102
  interface = gr.ChatInterface(
 
12
  # Load model and tokenizer
13
  model_name = "Qwen/Qwen2.5-Coder-32B"
14
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
15
+
16
+ # Configure model loading with specific parameters
17
  model = AutoModelForCausalLM.from_pretrained(
18
  model_name,
19
  device_map="auto",
20
  trust_remote_code=True,
21
+ torch_dtype=torch.float16,
22
+ low_cpu_mem_usage=True
23
  )
24
 
25
  def format_chat_response(response_text, prompt_tokens, completion_tokens):
 
94
  # Add current message
95
  messages.append({"role": "user", "content": message})
96
 
97
+ # Create a mock request object with the messages
98
+ mock_request = Request(scope={"type": "http"}, receive=None)
99
+ mock_request.json = lambda: {"messages": messages}
100
+
101
  # Get response
102
+ response = await chat_completion(mock_request)
103
  if isinstance(response, JSONResponse):
104
+ response_data = json.loads(response.body.decode())
105
+ return response_data["choices"][0]["message"]["content"]
 
106
  return "Error generating response"
107
 
108
  interface = gr.ChatInterface(