whoami02 commited on
Commit
026899d
·
1 Parent(s): 1a507e1

Upload gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +90 -0
gradio_app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import urllib.request
3
+ import gradio as gr
4
+ from llama_cpp import Llama
5
+ from langchain.llms import llamacpp
6
+ from huggingface_hub import login, hf_hub_download
7
+ from dotenv import load_dotenv
8
+
9
+ MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
10
+ MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
11
+ # MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-GGUF"
12
+ # MODEL_BASENAME = "Wizard-Vicuna-7B-Uncensored.Q4_K_M.gguf"
13
+ CONTEXT_WINDOW_SIZE = 8000
14
+ MAX_NEW_TOKENS = 2000
15
+ N_BATCH = 128
16
+ load_dotenv()
17
+ def load_quantized_model(model_id, model_basename):
18
+ try:
19
+ model_path = hf_hub_download(
20
+ repo_id=model_id,
21
+ filename=model_basename,
22
+ resume_download=True,
23
+ cache_dir="./models"
24
+ )
25
+ kwargs = {
26
+ 'model_path': model_path,
27
+ 'c_ctx': CONTEXT_WINDOW_SIZE,
28
+ 'max_tokens': MAX_NEW_TOKENS,
29
+ 'n_batch': N_BATCH
30
+ }
31
+ return llamacpp.LlamaCpp(**kwargs)
32
+ except TypeError:
33
+ return None
34
+
35
+ def load_model(model_id, model_basename=None):
36
+ if ".gguf" in model_basename.lower():
37
+ llm = load_quantized_model(model_id, model_basename)
38
+ return llm
39
+ else:
40
+ print("currently only .gguf models supported")
41
+
42
+
43
+ # Dowloading GGML model from HuggingFace
44
+ # ggml_model_path = "https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_1.bin"
45
+ # filename = "ggml-vicuna-7b-1.1-q4_1.bin"
46
+
47
+ # download_file(ggml_model_path, filename)
48
+
49
+
50
+ # llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
51
+
52
+
53
+ def generate_text(prompt="Who is the CEO of Apple?"):
54
+ llm = load_model(MODEL_ID, MODEL_BASENAME)
55
+ output = llm(
56
+ prompt,
57
+ max_tokens=256,
58
+ temperature=0.1,
59
+ top_p=0.5,
60
+ echo=False,
61
+ stop=["#"],
62
+ )
63
+ print(output)
64
+ return output
65
+ # output_text = output["choices"][0]["text"].strip()
66
+
67
+ # # Remove Prompt Echo from Generated Text
68
+ # cleaned_output_text = output_text.replace(prompt, "")
69
+ # return cleaned_output_text
70
+
71
+
72
+ description = "Zephyr-beta"
73
+
74
+ examples = [
75
+ ["What is the capital of France?", "The capital of France is Paris."],
76
+ [
77
+ "Who wrote the novel 'Pride and Prejudice'?",
78
+ "The novel 'Pride and Prejudice' was written by Jane Austen.",
79
+ ],
80
+ ["What is the square root of 64?", "The square root of 64 is 8."],
81
+ ]
82
+
83
+ gradio_interface = gr.Interface(
84
+ fn=generate_text,
85
+ inputs="text",
86
+ outputs="text",
87
+ examples=examples,
88
+ title="Zephyr-B",
89
+ )
90
+ gradio_interface.launch(share=True)