Spaces:

Sohrabfx
/

FXLabChat

Runtime error

Sohrabfx commited on Jul 1, 2024

Commit

96f6ff9

verified ·

1 Parent(s): c39bcf7

Change llm

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,26 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("TheBloke/CodeLlama-7B-Instruct-GGML")
 def respond(
     message,
@@ -27,7 +42,7 @@ def respond(
     response = ""
-    for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
         stream=True,

+import os
+import urllib.request
 import gradio as gr
+from llama_cpp import Llama
+def download_file(file_link, filename):
+    # Checks if the file already exists before downloading
+    if not os.path.isfile(filename):
+        urllib.request.urlretrieve(file_link, filename)
+        print("File downloaded successfully.")
+    else:
+        print("File already exists.")
+# Dowloading GGML model from HuggingFace
+ggml_model_path = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGML/blob/main/codellama-7b-instruct.ggmlv3.Q4_1.bin"
+filename = "codellama-7b-instruct.ggmlv3.Q4_1.bin"
+download_file(ggml_model_path, filename)
+llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
 def respond(
     message,
     response = ""
+    for message in llm(
         messages,
         max_tokens=max_tokens,
         stream=True,