rodrigomasini commited on
Commit
55ed521
·
1 Parent(s): 4305b4c

Update app_v3.py

Browse files
Files changed (1) hide show
  1. app_v3.py +21 -20
app_v3.py CHANGED
@@ -1,7 +1,6 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, TextStreamer, pipeline
3
- from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
- import time
5
  from huggingface_hub import snapshot_download
6
  import os
7
 
@@ -11,37 +10,38 @@ cwd = os.getcwd()
11
 
12
  quantized_model_dir = cwd + "/Jackson2-4bit-128g-GPTQ"
13
 
14
- # Create the cache directory if it doesn't exist
15
- os.makedirs(quantized_model_dir, exist_ok=True)
16
-
17
- snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=True)
 
18
 
19
  st.write(f'{os.listdir(quantized_model_dir)}')
20
  model_name_or_path = quantized_model_dir
21
  model_basename = "Jackson2-4bit-128g-GPTQ"
22
 
23
- os.environ['CUDA_VISIBLE_DEVICES']='0'
24
 
25
  use_triton = False
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
28
 
29
- model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
30
- model_basename=model_basename,
31
- use_safetensors=True,
32
- trust_remote_code=True,
33
- device="cuda:0",
34
- use_triton=use_triton,
35
- quantize_config=None)
36
-
 
37
 
38
  user_input = st.text_input("Input a phrase")
39
 
40
  prompt_template = f'USER: {user_input}\nASSISTANT:'
41
 
42
  if st.button("Generate the prompt"):
43
-
44
- inputs_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
45
  streamer = TextStreamer(tokenizer)
46
  pipe = pipeline(
47
  "text-generation",
@@ -52,6 +52,7 @@ if st.button("Generate the prompt"):
52
  temperature=0.2,
53
  top_p=0.95,
54
  repetition_penalty=1.15
55
- )
56
- pipe(prompt_template)
57
- st.write(pipe(prompt_template)[0]['generated_text'])
 
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, TextStreamer, pipeline
3
+ from auto_gptq import AutoGPTQForCausalLM
 
4
  from huggingface_hub import snapshot_download
5
  import os
6
 
 
10
 
11
  quantized_model_dir = cwd + "/Jackson2-4bit-128g-GPTQ"
12
 
13
+ # Check if the model directory is empty (i.e., model not downloaded yet)
14
+ if not os.listdir(quantized_model_dir):
15
+ # Create the cache directory if it doesn't exist
16
+ os.makedirs(quantized_model_dir, exist_ok=True)
17
+ snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=True)
18
 
19
  st.write(f'{os.listdir(quantized_model_dir)}')
20
  model_name_or_path = quantized_model_dir
21
  model_basename = "Jackson2-4bit-128g-GPTQ"
22
 
23
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
24
 
25
  use_triton = False
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
28
 
29
+ model = AutoGPTQForCausalLM.from_quantized(
30
+ model_name_or_path,
31
+ model_basename=model_basename,
32
+ use_safetensors=True,
33
+ trust_remote_code=True,
34
+ device="cuda:0",
35
+ use_triton=use_triton,
36
+ quantize_config=None
37
+ )
38
 
39
  user_input = st.text_input("Input a phrase")
40
 
41
  prompt_template = f'USER: {user_input}\nASSISTANT:'
42
 
43
  if st.button("Generate the prompt"):
44
+ input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
 
45
  streamer = TextStreamer(tokenizer)
46
  pipe = pipeline(
47
  "text-generation",
 
52
  temperature=0.2,
53
  top_p=0.95,
54
  repetition_penalty=1.15
55
+ )
56
+ # You had called pipe(prompt_template) twice which was unnecessary. Just call it once.
57
+ output = pipe(prompt_template)
58
+ st.write(output[0]['generated_text'])