Spaces:

MarketingHHM
/

fred-test-space

Runtime error

App Files Files Community

Cioni223 commited on Jan 13

Commit

e2221cc

verified ·

1 Parent(s): e68f0e3

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -16

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ from transformers import AutoTokenizer, LlamaForCausalLM, BitsAndBytesConfig
 from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
 from peft import PeftModel
 import gradio as gr
-import os
 # Add this new class for custom stopping criteria
 class SentenceEndingCriteria(StoppingCriteria):
@@ -16,25 +16,24 @@ class SentenceEndingCriteria(StoppingCriteria):
         return last_token in self.end_tokens
 def load_model():
-    model_path = "Cioni223/mymodel"
-    token = os.environ.get("HUGGINGFACE_TOKEN")  # Ensure you set this environment variable
     tokenizer = AutoTokenizer.from_pretrained(
         model_path,
         use_fast=False,
         padding_side="left",
         model_max_length=4096,
-        token=token
     )
     tokenizer.pad_token = tokenizer.eos_token
     model = LlamaForCausalLM.from_pretrained(
         model_path,
         device_map="auto",
         torch_dtype=torch.float16,
-        quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-        use_auth_token=token
     )
     return model, tokenizer
@@ -48,10 +47,8 @@ def format_chat_history(history):
             formatted_history += f"<|start_header_id|>assistant<|end_header_id|>{assistant_msg}<|eot_id|>\n"
     return formatted_history
-def chat_response(message, history=None):
-    if history is None:
-        history = []
     # Format the prompt with system message and chat history
     system_prompt = """<|start_header_id|>system<|end_header_id|>You are Fred, a virtual admissions coordinator for Haven Health Management, a mental health and substance abuse treatment facility. Your role is to respond conversationally and empathetically, like a human agent, using 1-2 sentences per response while guiding the conversation effectively. Your primary goal is to understand the caller's reason for reaching out, gather their medical history, and obtain their insurance details, ensuring the conversation feels natural and supportive. Once all the information is gathered politely end the conversation and if the user is qualified tell the user a live agent will reach out soon. Note: Medicaid is not accepted as insurance.<|eot_id|>"""
@@ -117,10 +114,8 @@ model, tokenizer = load_model()
 print("Model loaded!")
 # Create Gradio interface with chat
-demo = gr.Interface(
-    fn=chat_response,  # The function to call
-    inputs=[gr.Textbox(label="Message"), gr.State()],  # Message and history as inputs
-    outputs=[gr.Textbox(label="Response"), gr.State()],  # Response and updated history as outputs
     title="Admissions Agent Assistant",
     description="Chat with an AI-powered admissions coordinator. The agent will maintain context of your conversation.",
     examples=[
@@ -132,6 +127,5 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch(share=True) # Remove s

 from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
 from peft import PeftModel
 import gradio as gr
 # Add this new class for custom stopping criteria
 class SentenceEndingCriteria(StoppingCriteria):
         return last_token in self.end_tokens
 def load_model():
+    # Modify the model path to use the Hugging Face model ID
+    model_path = "Cioni223/mymodel"  # Replace with your actual model path on HF
     tokenizer = AutoTokenizer.from_pretrained(
         model_path,
         use_fast=False,
         padding_side="left",
         model_max_length=4096,
+        token=True  # Add this if your model is private
     )
     tokenizer.pad_token = tokenizer.eos_token
+    # Load merged model with quantization
     model = LlamaForCausalLM.from_pretrained(
         model_path,
         device_map="auto",
         torch_dtype=torch.float16,
+        quantization_config=BitsAndBytesConfig(load_in_8bit=True)
     )
     return model, tokenizer
             formatted_history += f"<|start_header_id|>assistant<|end_header_id|>{assistant_msg}<|eot_id|>\n"
     return formatted_history
+def chat_response(message, history):
     # Format the prompt with system message and chat history
     system_prompt = """<|start_header_id|>system<|end_header_id|>You are Fred, a virtual admissions coordinator for Haven Health Management, a mental health and substance abuse treatment facility. Your role is to respond conversationally and empathetically, like a human agent, using 1-2 sentences per response while guiding the conversation effectively. Your primary goal is to understand the caller's reason for reaching out, gather their medical history, and obtain their insurance details, ensuring the conversation feels natural and supportive. Once all the information is gathered politely end the conversation and if the user is qualified tell the user a live agent will reach out soon. Note: Medicaid is not accepted as insurance.<|eot_id|>"""
 print("Model loaded!")
 # Create Gradio interface with chat
+demo = gr.ChatInterface(
+    fn=chat_response,
     title="Admissions Agent Assistant",
     description="Chat with an AI-powered admissions coordinator. The agent will maintain context of your conversation.",
     examples=[
 )
 if __name__ == "__main__":
+    demo.launch()