Cioni223 commited on
Commit
e2221cc
·
verified ·
1 Parent(s): e68f0e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -16
app.py CHANGED
@@ -3,7 +3,7 @@ from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
6
- import os
7
 
8
  # Add this new class for custom stopping criteria
9
  class SentenceEndingCriteria(StoppingCriteria):
@@ -16,25 +16,24 @@ class SentenceEndingCriteria(StoppingCriteria):
16
  return last_token in self.end_tokens
17
 
18
  def load_model():
19
- model_path = "Cioni223/mymodel"
20
- token = os.environ.get("HUGGINGFACE_TOKEN") # Ensure you set this environment variable
21
-
22
  tokenizer = AutoTokenizer.from_pretrained(
23
  model_path,
24
  use_fast=False,
25
  padding_side="left",
26
  model_max_length=4096,
27
- token=token
28
  )
29
 
30
  tokenizer.pad_token = tokenizer.eos_token
31
 
 
32
  model = LlamaForCausalLM.from_pretrained(
33
  model_path,
34
  device_map="auto",
35
  torch_dtype=torch.float16,
36
- quantization_config=BitsAndBytesConfig(load_in_8bit=True),
37
- use_auth_token=token
38
  )
39
 
40
  return model, tokenizer
@@ -48,10 +47,8 @@ def format_chat_history(history):
48
  formatted_history += f"<|start_header_id|>assistant<|end_header_id|>{assistant_msg}<|eot_id|>\n"
49
  return formatted_history
50
 
51
- def chat_response(message, history=None):
52
- if history is None:
53
- history = []
54
 
 
55
  # Format the prompt with system message and chat history
56
  system_prompt = """<|start_header_id|>system<|end_header_id|>You are Fred, a virtual admissions coordinator for Haven Health Management, a mental health and substance abuse treatment facility. Your role is to respond conversationally and empathetically, like a human agent, using 1-2 sentences per response while guiding the conversation effectively. Your primary goal is to understand the caller's reason for reaching out, gather their medical history, and obtain their insurance details, ensuring the conversation feels natural and supportive. Once all the information is gathered politely end the conversation and if the user is qualified tell the user a live agent will reach out soon. Note: Medicaid is not accepted as insurance.<|eot_id|>"""
57
 
@@ -117,10 +114,8 @@ model, tokenizer = load_model()
117
  print("Model loaded!")
118
 
119
  # Create Gradio interface with chat
120
- demo = gr.Interface(
121
- fn=chat_response, # The function to call
122
- inputs=[gr.Textbox(label="Message"), gr.State()], # Message and history as inputs
123
- outputs=[gr.Textbox(label="Response"), gr.State()], # Response and updated history as outputs
124
  title="Admissions Agent Assistant",
125
  description="Chat with an AI-powered admissions coordinator. The agent will maintain context of your conversation.",
126
  examples=[
@@ -132,6 +127,5 @@ demo = gr.Interface(
132
  )
133
 
134
 
135
-
136
  if __name__ == "__main__":
137
- demo.launch(share=True) # Remove s
 
3
  from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
4
  from peft import PeftModel
5
  import gradio as gr
6
+
7
 
8
  # Add this new class for custom stopping criteria
9
  class SentenceEndingCriteria(StoppingCriteria):
 
16
  return last_token in self.end_tokens
17
 
18
  def load_model():
19
+ # Modify the model path to use the Hugging Face model ID
20
+ model_path = "Cioni223/mymodel" # Replace with your actual model path on HF
 
21
  tokenizer = AutoTokenizer.from_pretrained(
22
  model_path,
23
  use_fast=False,
24
  padding_side="left",
25
  model_max_length=4096,
26
+ token=True # Add this if your model is private
27
  )
28
 
29
  tokenizer.pad_token = tokenizer.eos_token
30
 
31
+ # Load merged model with quantization
32
  model = LlamaForCausalLM.from_pretrained(
33
  model_path,
34
  device_map="auto",
35
  torch_dtype=torch.float16,
36
+ quantization_config=BitsAndBytesConfig(load_in_8bit=True)
 
37
  )
38
 
39
  return model, tokenizer
 
47
  formatted_history += f"<|start_header_id|>assistant<|end_header_id|>{assistant_msg}<|eot_id|>\n"
48
  return formatted_history
49
 
 
 
 
50
 
51
+ def chat_response(message, history):
52
  # Format the prompt with system message and chat history
53
  system_prompt = """<|start_header_id|>system<|end_header_id|>You are Fred, a virtual admissions coordinator for Haven Health Management, a mental health and substance abuse treatment facility. Your role is to respond conversationally and empathetically, like a human agent, using 1-2 sentences per response while guiding the conversation effectively. Your primary goal is to understand the caller's reason for reaching out, gather their medical history, and obtain their insurance details, ensuring the conversation feels natural and supportive. Once all the information is gathered politely end the conversation and if the user is qualified tell the user a live agent will reach out soon. Note: Medicaid is not accepted as insurance.<|eot_id|>"""
54
 
 
114
  print("Model loaded!")
115
 
116
  # Create Gradio interface with chat
117
+ demo = gr.ChatInterface(
118
+ fn=chat_response,
 
 
119
  title="Admissions Agent Assistant",
120
  description="Chat with an AI-powered admissions coordinator. The agent will maintain context of your conversation.",
121
  examples=[
 
127
  )
128
 
129
 
 
130
  if __name__ == "__main__":
131
+ demo.launch()