rodrigomasini commited on
Commit
5975797
·
1 Parent(s): 04f87d0

Delete app_v1.py

Browse files
Files changed (1) hide show
  1. app_v1.py +0 -74
app_v1.py DELETED
@@ -1,74 +0,0 @@
1
- import streamlit as st
2
- from transformers import AutoTokenizer
3
- from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
4
- from huggingface_hub import snapshot_download
5
- import os
6
- import torch
7
-
8
- # Clear up some memory
9
- #torch.cuda.empty_cache()
10
-
11
- # Try reducing the number of threads PyTorch uses
12
- # torch.set_num_threads(1)
13
-
14
- cwd = os.getcwd()
15
- cachedir = cwd + '/cache'
16
-
17
- # Check if the directory exists before creating it
18
- if not os.path.exists(cachedir):
19
- os.mkdir(cachedir)
20
-
21
- os.environ['HF_HOME'] = cachedir
22
-
23
- local_folder = cachedir + "/model"
24
-
25
- quantized_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
26
-
27
- # Check if the model has already been downloaded
28
- model_path = os.path.join(local_folder, 'pytorch_model.bin')
29
- if not os.path.isfile(model_path):
30
- snapshot_download(repo_id=quantized_model_dir, local_dir=local_folder, local_dir_use_symlinks=False)
31
-
32
- model_basename = cachedir + "/model/Jackson2-4bit-128g-GPTQ"
33
-
34
- use_strict = False
35
- use_triton = False
36
-
37
- # Load tokenizer and model
38
- tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
39
-
40
- quantize_config = BaseQuantizeConfig(
41
- bits=4,
42
- group_size=128,
43
- desc_act=False
44
- )
45
-
46
- model = AutoGPTQForCausalLM.from_quantized(
47
- local_folder,
48
- use_safetensors=True,
49
- strict=use_strict,
50
- model_basename=model_basename,
51
- device="cuda:0",
52
- trust_remote_code=True,
53
- use_triton=use_triton,
54
- quantize_config=quantize_config
55
- )
56
-
57
- #st.write(model.hf_device_map)
58
- user_input = st.text_input("Input a phrase")
59
-
60
- prompt_template = f'USER: {user_input}\nASSISTANT:'
61
-
62
- # Generate output when the "Generate" button is pressed
63
- if st.button("Generate the prompt"):
64
- inputs = tokenizer(prompt_template, return_tensors="pt")
65
- outputs = model.generate(
66
- input_ids=inputs.input_ids.to("cuda:0"),
67
- attention_mask=inputs.attention_mask.to("cuda:0"),
68
- max_length=512 + inputs.input_ids.size(-1),
69
- temperature=0.1,
70
- top_p=0.95,
71
- repetition_penalty=1.15
72
- )
73
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
- st.text_area("Prompt", value=generated_text)