Spaces:

rodrigomasini
/

rephrase

Paused

App Files Files Community

rodrigomasini commited on Nov 7, 2023

Commit

55ed521

1 Parent(s): 4305b4c

Update app_v3.py

Browse files

Files changed (1) hide show

app_v3.py +21 -20

app_v3.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 from transformers import AutoTokenizer, TextStreamer, pipeline
-from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
-import time
 from huggingface_hub import snapshot_download
 import os
@@ -11,37 +10,38 @@ cwd = os.getcwd()
 quantized_model_dir = cwd + "/Jackson2-4bit-128g-GPTQ"
-# Create the cache directory if it doesn't exist
-os.makedirs(quantized_model_dir, exist_ok=True)
-snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=True)
 st.write(f'{os.listdir(quantized_model_dir)}')
 model_name_or_path = quantized_model_dir
 model_basename = "Jackson2-4bit-128g-GPTQ"
-os.environ['CUDA_VISIBLE_DEVICES']='0'
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
-model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
-        model_basename=model_basename,
-        use_safetensors=True,
-        trust_remote_code=True,
-        device="cuda:0",
-        use_triton=use_triton,
-        quantize_config=None)
 user_input = st.text_input("Input a phrase")
 prompt_template = f'USER: {user_input}\nASSISTANT:'
 if st.button("Generate the prompt"):
-    inputs_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
     streamer = TextStreamer(tokenizer)
     pipe = pipeline(
         "text-generation",
@@ -52,6 +52,7 @@ if st.button("Generate the prompt"):
         temperature=0.2,
         top_p=0.95,
         repetition_penalty=1.15
-        )
-    pipe(prompt_template)
-    st.write(pipe(prompt_template)[0]['generated_text'])

 import streamlit as st
 from transformers import AutoTokenizer, TextStreamer, pipeline
+from auto_gptq import AutoGPTQForCausalLM
 from huggingface_hub import snapshot_download
 import os
 quantized_model_dir = cwd + "/Jackson2-4bit-128g-GPTQ"
+# Check if the model directory is empty (i.e., model not downloaded yet)
+if not os.listdir(quantized_model_dir):
+    # Create the cache directory if it doesn't exist
+    os.makedirs(quantized_model_dir, exist_ok=True)
+    snapshot_download(repo_id=pretrained_model_dir, local_dir=quantized_model_dir, local_dir_use_symlinks=True)
 st.write(f'{os.listdir(quantized_model_dir)}')
 model_name_or_path = quantized_model_dir
 model_basename = "Jackson2-4bit-128g-GPTQ"
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 use_triton = False
 tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
+model = AutoGPTQForCausalLM.from_quantized(
+    model_name_or_path,
+    model_basename=model_basename,
+    use_safetensors=True,
+    trust_remote_code=True,
+    device="cuda:0",
+    use_triton=use_triton,
+    quantize_config=None
+)
 user_input = st.text_input("Input a phrase")
 prompt_template = f'USER: {user_input}\nASSISTANT:'
 if st.button("Generate the prompt"):
+    input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
     streamer = TextStreamer(tokenizer)
     pipe = pipeline(
         "text-generation",
         temperature=0.2,
         top_p=0.95,
         repetition_penalty=1.15
+    )
+    # You had called pipe(prompt_template) twice which was unnecessary. Just call it once.
+    output = pipe(prompt_template)
+    st.write(output[0]['generated_text'])