Maxmobi commited on
Commit
b09533c
·
verified ·
1 Parent(s): 71d2a83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -49
app.py CHANGED
@@ -1,60 +1,44 @@
1
- import os
2
- import urllib.request
3
  import gradio as gr
4
- from llama_cpp import Llama
5
-
6
-
7
- def download_file(file_link, filename):
8
- # Checks if the file already exists before downloading
9
- if not os.path.isfile(filename):
10
- urllib.request.urlretrieve(file_link, filename)
11
- print("File downloaded successfully.")
12
- else:
13
- print("File already exists.")
14
-
15
-
16
- # Dowloading GGML model from HuggingFace
17
- ggml_model_path = "https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_1.bin"
18
- filename = "ggml-vicuna-7b-1.1-q4_1.bin"
19
-
20
- download_file(ggml_model_path, filename)
21
-
22
 
23
- llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
24
 
 
 
 
25
 
26
- def generate_text(prompt="Who is the CEO of Apple?"):
27
- output = llm(
28
- prompt,
29
- max_tokens=256,
30
- temperature=0.1,
31
- top_p=0.5,
32
- echo=False,
33
- stop=["#"],
34
  )
35
- output_text = output["choices"][0]["text"].strip()
 
 
36
 
37
  # Remove Prompt Echo from Generated Text
38
- cleaned_output_text = output_text.replace(prompt, "")
39
  return cleaned_output_text
40
 
41
 
42
- description = "Vicuna-7B"
43
-
44
- examples = [
45
- ["What is the capital of France?", "The capital of France is Paris."],
46
- [
47
- "Who wrote the novel 'Pride and Prejudice'?",
48
- "The novel 'Pride and Prejudice' was written by Jane Austen.",
49
- ],
50
- ["What is the square root of 64?", "The square root of 64 is 8."],
51
- ]
52
-
53
- gradio_interface = gr.Interface(
54
  fn=generate_text,
55
- inputs="text",
56
- outputs="text",
57
- examples=examples,
58
- title="Vicuna-7B",
59
- )
60
- gradio_interface.launch()
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ model = AutoModelForCausalLM.from_pretrained(
6
+ "tiiuae/falcon-7b-instruct",
7
+ torch_dtype=torch.bfloat16,
8
+ trust_remote_code=True,
9
+ device_map="auto",
10
+ low_cpu_mem_usage=True,
11
+ )
12
+ tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct")
 
 
 
 
 
 
 
13
 
 
14
 
15
+ def generate_text(input_text):
16
+ input_ids = tokenizer.encode(input_text, return_tensors="pt")
17
+ attention_mask = torch.ones(input_ids.shape)
18
 
19
+ output = model.generate(
20
+ input_ids,
21
+ attention_mask=attention_mask,
22
+ max_length=200,
23
+ do_sample=True,
24
+ top_k=10,
25
+ num_return_sequences=1,
26
+ eos_token_id=tokenizer.eos_token_id,
27
  )
28
+
29
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
30
+ print(output_text)
31
 
32
  # Remove Prompt Echo from Generated Text
33
+ cleaned_output_text = output_text.replace(input_text, "")
34
  return cleaned_output_text
35
 
36
 
37
+ text_generation_interface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
38
  fn=generate_text,
39
+ inputs=[
40
+ gr.inputs.Textbox(label="Input Text"),
41
+ ],
42
+ outputs=gr.inputs.Textbox(label="Generated Text"),
43
+ title="Falcon-7B Instruct",
44
+ ).launch()