Ivan000 commited on
Commit
a1689f4
·
verified ·
1 Parent(s): 808b3ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -56
app.py CHANGED
@@ -1,78 +1,69 @@
1
  # app.py
2
  # =============
3
- # This is a complete app.py file for a text generation app using the Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF model.
4
- # The app is built using Gradio and runs on a CPU without video memory.
5
 
6
- # Imports
7
- # =======
8
- import gradio as gr
9
- from transformers import AutoModelForCausalLM, AutoTokenizer
10
- import torch
 
 
 
 
11
 
12
- # Constants
13
- # =========
14
- MODEL_NAME = "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF"
15
- DEVICE = "cpu" # Ensure the model runs on CPU
16
 
17
- # Load Model and Tokenizer
18
- # ========================
19
- def load_model_and_tokenizer():
20
- """
21
- Load the model and tokenizer from Hugging Face.
22
- """
23
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
24
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, device_map=DEVICE)
25
- return tokenizer, model
26
 
27
- tokenizer, model = load_model_and_tokenizer()
 
 
 
 
 
 
28
 
29
- # Generate Text
30
- # =============
31
- def generate_text(prompt, max_length=100):
32
  """
33
- Generate text based on the given prompt.
34
 
35
  Args:
36
- prompt (str): The input prompt for text generation.
37
- max_length (int): The maximum length of the generated text.
38
 
39
  Returns:
40
- str: The generated text.
41
  """
42
- inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
43
- outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1)
44
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
- return generated_text
 
 
 
 
 
46
 
47
- # Gradio Interface
48
- # =================
49
  def gradio_interface():
50
  """
51
- Create and launch the Gradio interface.
52
  """
53
  iface = gr.Interface(
54
- fn=generate_text,
55
- inputs=[
56
- gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
57
- gr.inputs.Slider(minimum=50, maximum=500, step=10, default=100, label="Max Length")
58
- ],
59
  outputs="text",
60
- title="Qwen2.5-Coder-0.5B-Instruct-GGUF Text Generation",
61
- description="Generate text using the Qwen2.5-Coder-0.5B-Instruct-GGUF model."
62
  )
63
- iface.launch()
64
 
65
- # Main
66
- # ====
67
  if __name__ == "__main__":
68
- gradio_interface()
69
-
70
- # Dependencies
71
- # =============
72
- # The following dependencies are required to run this app:
73
- # - transformers
74
- # - gradio
75
- # - torch
76
- #
77
- # You can install these dependencies using pip:
78
- # pip install transformers gradio torch
 
1
  # app.py
2
  # =============
3
+ # This is a complete app.py file for a Gradio app using the meta-llama/Llama-3.2-3B-Instruct model.
4
+ # The app allows users to input a message and receive a response from the model.
5
 
6
+ # Dependencies
7
+ # =============
8
+ # The following dependencies are required to run this app:
9
+ # - transformers
10
+ # - gradio
11
+ # - torch
12
+ #
13
+ # You can install these dependencies using pip:
14
+ # pip install transformers gradio torch
15
 
16
+ import torch
17
+ from transformers import pipeline
18
+ import gradio as gr # Import gradio
 
19
 
20
+ # Load the model and tokenizer
21
+ model_id = "meta-llama/Llama-3.2-3B-Instruct"
22
+ device = "cpu" # Use CPU for inference
 
 
 
 
 
 
23
 
24
+ # Initialize the pipeline
25
+ pipe = pipeline(
26
+ "text-generation",
27
+ model=model_id,
28
+ torch_dtype=torch.bfloat16,
29
+ device_map="auto",
30
+ )
31
 
32
+ def generate_response(prompt):
 
 
33
  """
34
+ Generate a response from the model based on the given prompt.
35
 
36
  Args:
37
+ prompt (str): The input message from the user.
 
38
 
39
  Returns:
40
+ str: The generated response from the model.
41
  """
42
+ messages = [
43
+ {"role": "system", "content": "You are a helpful assistant!"},
44
+ {"role": "user", "content": prompt},
45
+ ]
46
+ outputs = pipe(
47
+ messages,
48
+ max_new_tokens=256,
49
+ )
50
+ return outputs[0]["generated_text"][-1]
51
 
52
+ # Define the Gradio interface
 
53
  def gradio_interface():
54
  """
55
+ Define the Gradio interface for the app.
56
  """
57
  iface = gr.Interface(
58
+ fn=generate_response,
59
+ inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your message here..."),
 
 
 
60
  outputs="text",
61
+ title="Llama-3.2-3B-Instruct Chatbot",
62
+ description="Chat with the Llama-3.2-3B-Instruct model. Enter your message and get a response!",
63
  )
64
+ return iface
65
 
66
+ # Launch the Gradio app
 
67
  if __name__ == "__main__":
68
+ iface = gradio_interface()
69
+ iface.launch()