ruslanmv commited on
Commit
ce82031
1 Parent(s): a6ddd2a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -5
app.py CHANGED
@@ -18,7 +18,6 @@ model_name = "ruslanmv/Medical-Llama3-8B"
18
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
19
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
20
  tokenizer.pad_token = tokenizer.eos_token
21
-
22
  @spaces.GPU
23
  def askme(symptoms, question):
24
  sys_message = '''\
@@ -28,14 +27,47 @@ def askme(symptoms, question):
28
  content = symptoms + " " + question
29
  messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
30
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
31
- inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
32
- outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
33
- response_text = tokenizer.batch_decode(outputs)[0].strip()
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Remove system messages and content
35
  #response_text = response_text.replace(sys_message, "").replace(content, "").strip()
36
  # Extract only the assistant's response
37
- return response_text
 
 
 
 
 
 
 
 
 
 
38
 
 
 
 
 
 
 
 
 
 
 
 
39
  # Example usage
40
  symptoms = '''\
41
  I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
 
18
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
19
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
20
  tokenizer.pad_token = tokenizer.eos_token
 
21
  @spaces.GPU
22
  def askme(symptoms, question):
23
  sys_message = '''\
 
27
  content = symptoms + " " + question
28
  messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
29
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+ # inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
31
+ # outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
32
+ # response_text = tokenizer.batch_decode(outputs)[0].strip()
33
+
34
+
35
+
36
+ # Tokenize all prompts and batch them
37
+ tokenized_inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(device)
38
+ outputs = model.generate(**tokenized_inputs, max_new_tokens=200, use_cache=True)
39
+
40
+ # Decode responses
41
+ response_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
42
+
43
+
44
+
45
  # Remove system messages and content
46
  #response_text = response_text.replace(sys_message, "").replace(content, "").strip()
47
  # Extract only the assistant's response
48
+ #assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end', '')
49
+ # Extract only the assistant's response
50
+
51
+
52
+
53
+
54
+ # Extract assistant's responses
55
+ assistant_responses = []
56
+ for response_text in response_texts:
57
+ assistant_response = response_text.split("assistant")[1].strip().replace('<|im_end', '')
58
+ assistant_responses.append(assistant_response)
59
 
60
+ return assistant_responses
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+ return assistant_response
71
  # Example usage
72
  symptoms = '''\
73
  I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,