Spaces:

Electricarchmage
/

cookbookgptdemonstration

Sleeping

App Files Files Community

Electricarchmage commited on Dec 5, 2024

Commit

7a96b3a

verified ·

1 Parent(s): b698bb4

tell me how you fail specifically

Browse files

Files changed (1) hide show

app.py +23 -15

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
 # Load model and tokenizer from Hugging Face Hub
 model_name = "Electricarchmage/cookbookgpt"
@@ -11,7 +12,7 @@ tokenizer = GPT2Tokenizer.from_pretrained(model_name)
 tokenizer.pad_token = tokenizer.eos_token
 tokenizer.padding_side = 'left'
-# Define the respond function
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -23,7 +24,6 @@ def respond(
     # Preparing the messages for context (the history and the new message)
     messages = [{"role": "system", "content": system_message}]
-    # Convert history to the required format with 'role' and 'content'
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
@@ -32,21 +32,28 @@ def respond(
     messages.append({"role": "user", "content": message})
-    # Tokenize the input and generate a response
     inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True)
-    attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids']))  # Default to ones if not provided
     # Generate output tokens
-    output = model.generate(
-        inputs["input_ids"],
-        attention_mask=attention_mask,
-        max_length=max_tokens + len(inputs["input_ids"][0]),
-        temperature=temperature,
-        top_p=top_p,
-        num_return_sequences=1,
-        do_sample=True,  # Enable sampling for more dynamic responses
-        no_repeat_ngram_size=2,
-    )
     # Decode the output tokens into text
     response = tokenizer.decode(output[0], skip_special_tokens=True)
@@ -54,7 +61,8 @@ def respond(
     # Extract only the assistant's reply
     assistant_reply = response.split("Assistant:")[-1].strip()
-    return assistant_reply
 # Define the Gradio interface
 demo = gr.ChatInterface(

 import gradio as gr
 from transformers import GPT2LMHeadModel, GPT2Tokenizer
 import torch
+import time
 # Load model and tokenizer from Hugging Face Hub
 model_name = "Electricarchmage/cookbookgpt"
 tokenizer.pad_token = tokenizer.eos_token
 tokenizer.padding_side = 'left'
+# Define the respond function with logging for debugging
 def respond(
     message,
     history: list[tuple[str, str]],
     # Preparing the messages for context (the history and the new message)
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
     messages.append({"role": "user", "content": message})
+    # Tokenize the input
     inputs = tokenizer([msg["content"] for msg in messages], return_tensors="pt", padding=True, truncation=True)
+    attention_mask = inputs.get('attention_mask', torch.ones_like(inputs['input_ids']))
+    start_time = time.time()  # Start the timer
     # Generate output tokens
+    try:
+        output = model.generate(
+            inputs["input_ids"],
+            attention_mask=attention_mask,
+            max_length=max_tokens + len(inputs["input_ids"][0]),
+            temperature=temperature,
+            top_p=top_p,
+            num_return_sequences=1,
+            do_sample=True,
+            no_repeat_ngram_size=2,
+        )
+    except Exception as e:
+        return f"Error during generation: {str(e)}"
+    generation_time = time.time() - start_time  # Time taken for generation
     # Decode the output tokens into text
     response = tokenizer.decode(output[0], skip_special_tokens=True)
     # Extract only the assistant's reply
     assistant_reply = response.split("Assistant:")[-1].strip()
+    # Add generation time in the response for debugging
+    return f"Response: {assistant_reply}\nGeneration time: {generation_time:.2f} seconds"
 # Define the Gradio interface
 demo = gr.ChatInterface(