Spaces:

ans123
/

Friday

Sleeping

ans123 commited on Nov 2, 2024

Commit

03620de

verified ·

1 Parent(s): 38cd508

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import gradio as gr
 import pandas as pd
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-# Load the model and tokenizer
-model_name = "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 # Define the system message for the model
 system_message = (
@@ -52,13 +56,10 @@ def chat(user_input, messages):
         # Prepare the input for the model
         input_text = system_message + "\n" + "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
-        # Tokenize and encode the input text
-        inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
         try:
-            # Generate a response from the model
-            outputs = model.generate(**inputs, max_length=150, num_return_sequences=1, temperature=0.7)
-            response_content = tokenizer.decode(outputs[0], skip_special_tokens=True)
         except Exception as e:
             response_content = f"Error: {str(e)}"

 import gradio as gr
 import pandas as pd
 import torch
+from transformers import pipeline
+# Load the model pipeline
+model_id = "meta-llama/Llama-3.2-1B"
+pipe = pipeline(
+    "text-generation",
+    model=model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
 # Define the system message for the model
 system_message = (
         # Prepare the input for the model
         input_text = system_message + "\n" + "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])
         try:
+            # Generate a response using the pipeline
+            response = pipe(input_text, max_length=150, num_return_sequences=1, temperature=0.7)
+            response_content = response[0]['generated_text'].split('\n')[-1].strip()  # Extract the last line of the generated text
         except Exception as e:
             response_content = f"Error: {str(e)}"