Wtzwho commited on
Commit
a1b6133
1 Parent(s): 5e835ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -2,16 +2,15 @@ import os
2
  import gradio as gr
3
  from transformers import AutoTokenizer, pipeline
4
 
5
- # Initialize the model and tokenizer with environment variable for HF_TOKEN
6
  model_name = "AIFS/Prometh-MOEM-V.01"
7
- hf_token = os.getenv("HF_TOKEN") # More Pythonic way to fetch environment variables
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
10
-
11
  text_generation_pipeline = pipeline(
12
  "text-generation",
13
  model=model_name,
14
- model_kwargs={"torch_dtype": "auto", "load_in_4bit": True}, # 'auto' lets PyTorch decide the most optimal dtype
15
  use_auth_token=hf_token
16
  )
17
 
@@ -21,7 +20,7 @@ def generate_text(user_input):
21
  outputs = text_generation_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
22
  return outputs[0]["generated_text"]
23
 
24
- # Updated Gradio interface creation to use the latest syntax
25
  iface = gr.Interface(
26
  fn=generate_text,
27
  inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
@@ -30,5 +29,5 @@ iface = gr.Interface(
30
  description="A text generation model that understands your queries and generates concise, informative responses."
31
  )
32
 
33
- # Run the interface with enhanced parameters for better performance and user experience
34
- iface.launch(enable_queue=True) # enable_queue=True for handling high traffic
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, pipeline
4
 
5
+ # Initialize the model and tokenizer
6
  model_name = "AIFS/Prometh-MOEM-V.01"
7
+ hf_token = os.getenv("HF_TOKEN")
8
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
 
10
  text_generation_pipeline = pipeline(
11
  "text-generation",
12
  model=model_name,
13
+ model_kwargs={"torch_dtype": "auto", "load_in_4bit": True},
14
  use_auth_token=hf_token
15
  )
16
 
 
20
  outputs = text_generation_pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
21
  return outputs[0]["generated_text"]
22
 
23
+ # Create the Gradio interface without the unsupported argument
24
  iface = gr.Interface(
25
  fn=generate_text,
26
  inputs=gr.Textbox(lines=2, placeholder="Type your question here..."),
 
29
  description="A text generation model that understands your queries and generates concise, informative responses."
30
  )
31
 
32
+ # Launch the interface
33
+ iface.launch() # Removed the enable_queue=True argument