Spaces:

AIFS
/

Prometh-MOEM-24B

Runtime error

Wtzwho commited on Feb 16, 2024

Commit

a1b6133

verified ·

1 Parent(s): 5e835ad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,16 +2,15 @@ import os
 import gradio as gr
 from transformers import AutoTokenizer, pipeline
-# Initialize the model and tokenizer with environment variable for HF_TOKEN
 model_name = "AIFS/Prometh-MOEM-V.01"
-hf_token = os.getenv("HF_TOKEN")  # More Pythonic way to fetch environment variables
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
 text_generation_pipeline = pipeline(
     "text-generation",
     model=model_name,
-    model_kwargs={"torch_dtype": "auto", "load_in_4bit": True},  # 'auto' lets PyTorch decide the most optimal dtype
     use_auth_token=hf_token
 )
@@ -21,7 +20,7 @@ def generate_text(user_input):
     outputs = text_generation_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
     return outputs[0]["generated_text"]
-# Updated Gradio interface creation to use the latest syntax
 iface = gr.Interface(
     fn=generate_text,
     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
@@ -30,5 +29,5 @@ iface = gr.Interface(
     description="A text generation model that understands your queries and generates concise, informative responses."
 )
-# Run the interface with enhanced parameters for better performance and user experience
-iface.launch(enable_queue=True)  # enable_queue=True for handling high traffic

 import gradio as gr
 from transformers import AutoTokenizer, pipeline
+# Initialize the model and tokenizer
 model_name = "AIFS/Prometh-MOEM-V.01"
+hf_token = os.getenv("HF_TOKEN")
 tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
 text_generation_pipeline = pipeline(
     "text-generation",
     model=model_name,
+    model_kwargs={"torch_dtype": "auto", "load_in_4bit": True},
     use_auth_token=hf_token
 )
     outputs = text_generation_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
     return outputs[0]["generated_text"]
+# Create the Gradio interface without the unsupported argument
 iface = gr.Interface(
     fn=generate_text,
     inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
     description="A text generation model that understands your queries and generates concise, informative responses."
 )
+# Launch the interface
+iface.launch()  # Removed the enable_queue=True argument