theosaurus
commited on
Commit
·
b6072e3
1
Parent(s):
504b277
Add GPU decorator to generate_response function and improve login feedback
Browse files
app.py
CHANGED
@@ -58,7 +58,6 @@ llm_model = AutoModelForCausalLM.from_pretrained(
|
|
58 |
quantization_config=model_config,
|
59 |
device_map="auto")
|
60 |
|
61 |
-
@spaces.GPU
|
62 |
def initialize_llm():
|
63 |
"""
|
64 |
Initialize the LLM with careful memory management.
|
@@ -79,6 +78,7 @@ def initialize_llm():
|
|
79 |
|
80 |
return model, tokenizer
|
81 |
|
|
|
82 |
def generate_response(prompt:str, history: Optional[list], llm: Optional[AutoModelForCausalLM], tokenizer, max_length: int = 100) -> str:
|
83 |
"""
|
84 |
Generate a response from the LLM model given a prompt.
|
@@ -111,8 +111,8 @@ demo = gr.ChatInterface(
|
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
auth = HuggingFaceLogin()
|
114 |
-
auth.login()
|
|
|
115 |
|
116 |
-
# Initialize the model and tokenizer
|
117 |
llm_model, llm_tokenizer = initialize_llm()
|
118 |
demo.launch()
|
|
|
58 |
quantization_config=model_config,
|
59 |
device_map="auto")
|
60 |
|
|
|
61 |
def initialize_llm():
|
62 |
"""
|
63 |
Initialize the LLM with careful memory management.
|
|
|
78 |
|
79 |
return model, tokenizer
|
80 |
|
81 |
+
@spaces.GPU
|
82 |
def generate_response(prompt:str, history: Optional[list], llm: Optional[AutoModelForCausalLM], tokenizer, max_length: int = 100) -> str:
|
83 |
"""
|
84 |
Generate a response from the LLM model given a prompt.
|
|
|
111 |
|
112 |
if __name__ == "__main__":
|
113 |
auth = HuggingFaceLogin()
|
114 |
+
if auth.login():
|
115 |
+
print("Login successful!")
|
116 |
|
|
|
117 |
llm_model, llm_tokenizer = initialize_llm()
|
118 |
demo.launch()
|