Maxmobi commited on
Commit
dc8ad76
1 Parent(s): bd4907d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -32
app.py CHANGED
@@ -1,33 +1,44 @@
1
- >>> from llama_cpp import Llama
2
- >>> llm = Llama(
3
- model_path="./models/7B/llama-model.gguf",
4
- # n_gpu_layers=-1, # Uncomment to use GPU acceleration
5
- # seed=1337, # Uncomment to set a specific seed
6
- # n_ctx=2048, # Uncomment to increase the context window
 
 
 
 
7
  )
8
- >>> output = llm(
9
- "Q: Name the planets in the solar system? A: ", # Prompt
10
- max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
11
- stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
12
- echo=True # Echo the prompt back in the output
13
- ) # Generate a completion, can also call create_completion
14
- >>> print(output)
15
- {
16
- "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
17
- "object": "text_completion",
18
- "created": 1679561337,
19
- "model": "./models/7B/llama-model.gguf",
20
- "choices": [
21
- {
22
- "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
23
- "index": 0,
24
- "logprobs": None,
25
- "finish_reason": "stop"
26
- }
27
- ],
28
- "usage": {
29
- "prompt_tokens": 14,
30
- "completion_tokens": 28,
31
- "total_tokens": 42
32
- }
33
- }
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ "tiiuae/falcon-7b-instruct",
7
+ torch_dtype=torch.bfloat16,
8
+ trust_remote_code=True,
9
+ device_map="auto",
10
+ low_cpu_mem_usage=True,
11
  )
12
+ tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
13
+
14
+
15
+ def generate_text(input_text):
16
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
17
+ attention_mask = torch.ones(input_ids.shape)
18
+
19
+ output = model.generate(
20
+ input_ids,
21
+ attention_mask=attention_mask,
22
+ max_length=200,
23
+ do_sample=True,
24
+ top_k=10,
25
+ num_return_sequences=1,
26
+ eos_token_id=tokenizer.eos_token_id,
27
+ )
28
+
29
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
+ print(output_text)
31
+
32
+ # Remove Prompt Echo from Generated Text
33
+ cleaned_output_text = output_text.replace(input_text, "")
34
+ return cleaned_output_text
35
+
36
+
37
+ text_generation_interface = gr.Interface(
38
+ fn=generate_text,
39
+ inputs=[
40
+ gr.inputs.Textbox(label="Input Text"),
41
+ ],
42
+ outputs=gr.inputs.Textbox(label="Generated Text"),
43
+ title="Falcon-7B Instruct",
44
+ ).launch()