Spaces:

rodrigomasini
/

rephrase

Paused

App Files Files Community

rodrigomasini commited on Nov 8, 2023

Commit

5975797

1 Parent(s): 04f87d0

Delete app_v1.py

Browse files

Files changed (1) hide show

app_v1.py +0 -74

app_v1.py DELETED Viewed

@@ -1,74 +0,0 @@
-import streamlit as st
-from transformers import AutoTokenizer
-from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
-from huggingface_hub import snapshot_download
-import os
-import torch
-# Clear up some memory
-#torch.cuda.empty_cache()
-# Try reducing the number of threads PyTorch uses
-# torch.set_num_threads(1)
-cwd = os.getcwd()
-cachedir = cwd + '/cache'
-# Check if the directory exists before creating it
-if not os.path.exists(cachedir):
-    os.mkdir(cachedir)
-os.environ['HF_HOME'] = cachedir
-local_folder = cachedir + "/model"
-quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
-# Check if the model has already been downloaded
-model_path = os.path.join(local_folder, 'pytorch_model.bin')
-if not os.path.isfile(model_path):
-    snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=False)
-model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
-use_strict = False
-use_triton = False
-# Load tokenizer and model
-tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
-quantize_config = BaseQuantizeConfig(
-    bits=4,
-    group_size=128,
-    desc_act=False
-)
-model = AutoGPTQForCausalLM.from_quantized(
-    local_folder,
-    use_safetensors=True,
-    strict=use_strict,
-    model_basename=model_basename,
-    device="cuda:0",
-    trust_remote_code=True,
-    use_triton=use_triton,
-    quantize_config=quantize_config
-)
-#st.write(model.hf_device_map)
-user_input = st.text_input("Input a phrase")
-prompt_template = f'USER: {user_input}\nASSISTANT:'
-# Generate output when the "Generate" button is pressed
-if st.button("Generate the prompt"):
-    inputs = tokenizer(prompt_template, return_tensors="pt")
-    outputs = model.generate(
-        input_ids=inputs.input_ids.to("cuda:0"),
-        attention_mask=inputs.attention_mask.to("cuda:0"),
-        max_length=512 + inputs.input_ids.size(-1),
-        temperature=0.1,
-        top_p=0.95,
-        repetition_penalty=1.15
-   )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    st.text_area("Prompt", value=generated_text)