rajeshthangaraj1 commited on
Commit
d3218e7
1 Parent(s): 36186cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -16
app.py CHANGED
@@ -1,23 +1,56 @@
 
 
 
1
  import gradio as gr
2
- from transformers import pipeline
3
 
4
- # Load the GPT-2 model
5
- generator = pipeline('text-generation', model='gpt2')
6
 
7
- def generate_text(prompt):
8
- # Generate text based on the input prompt
9
- results = generator(prompt, max_length=100, num_return_sequences=1)
10
- return results[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Set up the Gradio interface
13
- # Updated to use gr.Textbox directly instead of gr.inputs.Textbox
14
- interface = gr.Interface(
15
- fn=generate_text,
16
- inputs=gr.Textbox(lines=2, placeholder="Type something here..."),
17
- outputs='text',
18
- title="Simple Generative AI",
19
- description="Type in a prompt and get a continuation from GPT-2!"
20
  )
21
 
22
- if __name__ == "__main__":
23
- interface.launch()
 
1
+ import os
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig
4
  import gradio as gr
5
+ from google.colab import userdata
6
 
 
 
7
 
8
+ # Set up the model and tokenizer
9
+ MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
11
+ os.environ['HF_TOKEN']=os.environ.get('HF_TOKEN')
12
+ os.environ['HUGGINGFACEHUB_API_TOKEN']=os.environ.get('HF_TOKEN')
13
+
14
+ # Configure quantization for CPU
15
+ quantization_config = BitsAndBytesConfig(
16
+ load_in_4bit=True,
17
+ bnb_4bit_compute_dtype=torch.bfloat16,
18
+ bnb_4bit_use_double_quant=True,
19
+ bnb_4bit_quant_type="nf4"
20
+ )
21
+
22
+ # Load the model with quantization
23
+ model = AutoModelForCausalLM.from_pretrained(
24
+ MODEL_ID,
25
+ torch_dtype=torch.bfloat16,
26
+ device_map="auto",
27
+ quantization_config=quantization_config
28
+ )
29
+
30
+ # Set the device to CPU
31
+ device = "cpu"
32
+ # model.to(device)
33
+
34
+ # Define the function for the Gradio interface
35
+ def chat_with_phi(message):
36
+ conversation = [{"role": "user", "content": message}]
37
+ pipe = pipeline(
38
+ "text-generation",
39
+ model=model,
40
+ tokenizer=tokenizer,
41
+ )
42
+ response = pipe(conversation)
43
+ return response[0]['generated_text']
44
 
45
  # Set up the Gradio interface
46
+ app = gr.Interface(
47
+ fn=chat_with_phi,
48
+ inputs=gr.Textbox(label="Type your message:"),
49
+ outputs=gr.Textbox(label="Phi 3.5 Responds:"),
50
+ title="Phi 3.5 Text Chat",
51
+ description="Chat with Phi 3.5 model. Ask anything!",
52
+ theme="huggingface"
53
  )
54
 
55
+ # Launch the app
56
+ app.launch(debug=True)