filipealmeida commited on
Commit
9b74e4d
1 Parent(s): 551ba9b

Switched to using llama_cpp

Browse files
Files changed (2) hide show
  1. app.py +21 -11
  2. requirements.txt +1 -4
app.py CHANGED
@@ -2,12 +2,25 @@ import gradio as gr
2
  from transformers import pipeline
3
  import logging
4
  import re
 
 
 
5
 
6
  # Set up logging
7
  logging.basicConfig(level=logging.INFO)
8
- logging.getLogger('transformers').setLevel(logging.INFO)
9
 
10
- llama = pipeline("text-generation", model="filipealmeida/open-llama-3b-v2-pii-transform", model_kwargs={"load_in_8bit": True})
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def generate_text(prompt, example):
13
  logging.debug(f"Received prompt: {prompt}")
@@ -19,21 +32,18 @@ def generate_text(prompt, example):
19
 
20
  logging.info(f"Input : {input}")
21
 
22
- output = llama(input, max_length=70)
23
- generated_text = output[0]["generated_text"]
 
24
  logging.info(f"Generated text: {generated_text}")
25
 
26
- match = re.search("### Response:\n(.*?)\n", generated_text, re.DOTALL)
27
-
28
- parsed_text = "ERROR"
29
- if match:
30
- parsed_text = match.group(1).strip()
31
- else:
32
- print("No matching section found.")
33
 
34
  logging.info(f"Parsed text: {parsed_text}")
35
  return parsed_text
36
 
 
 
37
 
38
  # Create a Gradio interface
39
  interface = gr.Interface(
 
2
  from transformers import pipeline
3
  import logging
4
  import re
5
+ from llama_cpp import Llama
6
+ from huggingface_hub import hf_hub_download
7
+ import sys
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO)
 
11
 
12
+
13
+ model_repo="filipealmeida/open-llama-3b-v2-pii-transform"
14
+ model_filename="ggml-model-f16.gguf"
15
+
16
+ def download_model():
17
+ print("Downloading model...")
18
+ sys.stdout.flush()
19
+ file = hf_hub_download(
20
+ repo_id=model_repo, filename=model_filename
21
+ )
22
+ print("Downloaded " + file)
23
+ return file
24
 
25
  def generate_text(prompt, example):
26
  logging.debug(f"Received prompt: {prompt}")
 
32
 
33
  logging.info(f"Input : {input}")
34
 
35
+ output = llm(input, max_tokens=200, stop=["\n"])
36
+ print(output)
37
+ generated_text = output['choices'][0]['text']
38
  logging.info(f"Generated text: {generated_text}")
39
 
40
+ parsed_text = generated_text.split("\n")[0]
 
 
 
 
 
 
41
 
42
  logging.info(f"Parsed text: {parsed_text}")
43
  return parsed_text
44
 
45
+ model = download_model()
46
+ llm = Llama(model_path=model)
47
 
48
  # Create a Gradio interface
49
  interface = gr.Interface(
requirements.txt CHANGED
@@ -1,4 +1 @@
1
- transformers==4.31.0
2
- torch==2.0.1
3
- bitsandbytes
4
- accelerate
 
1
+ llama-cpp-python