Spaces:

whoami02
/

gradio_101

Sleeping

App Files Files Community

whoami02 commited on Dec 29, 2023

Commit

026899d

1 Parent(s): 1a507e1

Upload gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +90 -0

gradio_app.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import os
+import urllib.request
+import gradio as gr
+from llama_cpp import Llama
+from langchain.llms import llamacpp
+from huggingface_hub import login, hf_hub_download
+from dotenv import load_dotenv
+MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
+MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
+# MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-GGUF"
+# MODEL_BASENAME = "Wizard-Vicuna-7B-Uncensored.Q4_K_M.gguf"
+CONTEXT_WINDOW_SIZE = 8000
+MAX_NEW_TOKENS = 2000
+N_BATCH = 128
+load_dotenv()
+def load_quantized_model(model_id, model_basename):
+    try:
+        model_path = hf_hub_download(
+            repo_id=model_id,
+            filename=model_basename,
+            resume_download=True,
+            cache_dir="./models"
+        )
+        kwargs = {
+            'model_path': model_path,
+            'c_ctx': CONTEXT_WINDOW_SIZE,
+            'max_tokens': MAX_NEW_TOKENS,
+            'n_batch': N_BATCH
+        }
+        return llamacpp.LlamaCpp(**kwargs)
+    except TypeError:
+        return None
+def load_model(model_id, model_basename=None):
+    if ".gguf" in model_basename.lower():
+        llm = load_quantized_model(model_id, model_basename)
+        return llm
+    else:
+        print("currently only .gguf models supported")
+# Dowloading GGML model from HuggingFace
+# ggml_model_path = "https://huggingface.co/CRD716/ggml-vicuna-1.1-quantized/resolve/main/ggml-vicuna-7b-1.1-q4_1.bin"
+# filename = "ggml-vicuna-7b-1.1-q4_1.bin"
+# download_file(ggml_model_path, filename)
+# llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
+def generate_text(prompt="Who is the CEO of Apple?"):
+    llm = load_model(MODEL_ID, MODEL_BASENAME)
+    output = llm(
+        prompt,
+        max_tokens=256,
+        temperature=0.1,
+        top_p=0.5,
+        echo=False,
+        stop=["#"],
+    )
+    print(output)
+    return output
+    # output_text = output["choices"][0]["text"].strip()
+    # # Remove Prompt Echo from Generated Text
+    # cleaned_output_text = output_text.replace(prompt, "")
+    # return cleaned_output_text
+description = "Zephyr-beta"
+examples = [
+    ["What is the capital of France?", "The capital of France is Paris."],
+    [
+        "Who wrote the novel 'Pride and Prejudice'?",
+        "The novel 'Pride and Prejudice' was written by Jane Austen.",
+    ],
+    ["What is the square root of 64?", "The square root of 64 is 8."],
+]
+gradio_interface = gr.Interface(
+    fn=generate_text,
+    inputs="text",
+    outputs="text",
+    examples=examples,
+    title="Zephyr-B",
+)
+gradio_interface.launch(share=True)