Spaces:

davnas
/

trialDelete

Runtime error

App Files Files Community

davnas commited on Dec 8, 2024

Commit

d58a9a4

verified ·

1 Parent(s): 5b94e9b

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -37

app.py CHANGED Viewed

@@ -1,55 +1,59 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-# Initialize the client
-client = InferenceClient(
-    model="davnas/Italian_Cousine_2.1",
-    headers={"Content-Type": "application/json"}
 )
 def respond(message, history, system_message, max_tokens, temperature, top_p):
-    # Format the prompt including history and system message
-    prompt = ""
-    # Add system message if provided
-    if system_message:
-        prompt += f"{system_message}\n"
-    # Add conversation history
-    for msg in history:
-        if isinstance(msg, list) and len(msg) == 2:
-            prompt += f"User: {msg[0]}\nAssistant: {msg[1]}\n"
     # Add current message
-    prompt += f"User: {message}\nAssistant:"
-    # Prepare parameters for text generation
-    parameters = {
-        "max_new_tokens": max_tokens,
-        "temperature": temperature,
-        "top_p": top_p,
-        "return_full_text": False
-    }
-    response = ""
-    try:
-        # Use generate_text with proper parameters
-        for token in client.text_generation(
-            prompt,
-            stream=True,
-            **parameters
-        ):
-            response += token
-            yield response
-    except Exception as e:
-        yield f"Error: {str(e)}"
 # Create the interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(
-            value="You are a helpful assistant knowledgeable about Italian cuisine.",
             label="System message"
         ),
         gr.Slider(
@@ -73,7 +77,9 @@ demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)"
         ),
-    ]
 )
 if __name__ == "__main__":

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# Load model and tokenizer
+model_name = "davnas/Italian_Cousine_2.1"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float32,  # Use float32 for CPU
+    low_cpu_mem_usage=True,
+    device_map="auto"
 )
 def respond(message, history, system_message, max_tokens, temperature, top_p):
+    # Format the conversation
+    messages = [{"role": "system", "content": system_message}]
+    # Add history
+    for user_msg, assistant_msg in history:
+        messages.append({"role": "user", "content": user_msg})
+        messages.append({"role": "assistant", "content": assistant_msg})
     # Add current message
+    messages.append({"role": "user", "content": message})
+    # Create the prompt using the tokenizer's chat template
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    )
+    # Generate response
+    with torch.no_grad():
+        output_ids = model.generate(
+            input_ids,
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            pad_token_id=tokenizer.pad_token_id,
+            streaming=True
+        )
+    # Decode and return the response
+    response = tokenizer.decode(output_ids[0][len(input_ids[0]):], skip_special_tokens=True)
+    return response
 # Create the interface
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
         gr.Textbox(
+            value="You are a professional chef assistant who provides accurate and detailed recipes.",
             label="System message"
         ),
         gr.Slider(
             step=0.05,
             label="Top-p (nucleus sampling)"
         ),
+    ],
+    title="Italian Cuisine Chatbot",
+    description="Ask me anything about Italian cuisine or cooking!"
 )
 if __name__ == "__main__":