Spaces:

AIRRC
/

ELN-Llama-1B-base-chat

Sleeping

diabolic6045 commited on Jan 21

Commit

92db476

verified ·

1 Parent(s): 21a7b99

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,19 +10,30 @@ def generate_response(message, temperature, max_length):
     # Tokenize input
     inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
-    # Generate response
     with torch.no_grad():
-        outputs = model.generate(
             inputs["input_ids"],
             max_length=max_length,
             temperature=temperature,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1,
         )
-    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
 # Create the Gradio interface
 demo = gr.Interface(
@@ -46,7 +57,7 @@ demo = gr.Interface(
     - Start with a clear and detailed prompt
     - Adjust temperature: Higher for creative writing, lower for factual completion
     - Adjust max length based on how much text you want to generate
-    """
 )
 if __name__ == "__main__":

     # Tokenize input
     inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=512)
+    # Initialize the generated text with the input message
+    generated_text = message
+    # Generate response token by token
     with torch.no_grad():
+        generated_ids = model.generate(
             inputs["input_ids"],
             max_length=max_length,
             temperature=temperature,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id,
             num_return_sequences=1,
+            return_dict_in_generate=True,
+            output_scores=True,
         )
+        # Get the generated token ids (excluding the input prompt)
+        new_tokens = generated_ids.sequences[0][inputs["input_ids"].shape[1]:]
+        # Decode and yield tokens one by one
+        for i in range(len(new_tokens)):
+            next_token = tokenizer.decode(new_tokens[:i+1], skip_special_tokens=True)
+            generated_text += next_token
+            yield generated_text
 # Create the Gradio interface
 demo = gr.Interface(
     - Start with a clear and detailed prompt
     - Adjust temperature: Higher for creative writing, lower for factual completion
     - Adjust max length based on how much text you want to generate
+    """,
 )
 if __name__ == "__main__":