Sohrabfx commited on
Commit
96f6ff9
1 Parent(s): c39bcf7

Change llm

Browse files
Files changed (1) hide show
  1. app.py +21 -6
app.py CHANGED
@@ -1,11 +1,26 @@
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("TheBloke/CodeLlama-7B-Instruct-GGML")
8
 
 
9
 
10
  def respond(
11
  message,
@@ -27,7 +42,7 @@ def respond(
27
 
28
  response = ""
29
 
30
- for message in client.chat_completion(
31
  messages,
32
  max_tokens=max_tokens,
33
  stream=True,
 
1
+ import os
2
+ import urllib.request
3
  import gradio as gr
4
+ from llama_cpp import Llama
5
+
6
+
7
+ def download_file(file_link, filename):
8
+ # Checks if the file already exists before downloading
9
+ if not os.path.isfile(filename):
10
+ urllib.request.urlretrieve(file_link, filename)
11
+ print("File downloaded successfully.")
12
+ else:
13
+ print("File already exists.")
14
+
15
+
16
+ # Dowloading GGML model from HuggingFace
17
+ ggml_model_path = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGML/blob/main/codellama-7b-instruct.ggmlv3.Q4_1.bin"
18
+ filename = "codellama-7b-instruct.ggmlv3.Q4_1.bin"
19
+
20
+ download_file(ggml_model_path, filename)
21
 
 
 
 
 
22
 
23
+ llm = Llama(model_path=filename, n_ctx=512, n_batch=126)
24
 
25
  def respond(
26
  message,
 
42
 
43
  response = ""
44
 
45
+ for message in llm(
46
  messages,
47
  max_tokens=max_tokens,
48
  stream=True,