Arturo Jiménez de los Galanes Reguillos commited on
Commit
1d2e578
·
1 Parent(s): a22e0d4

Do some refactorization

Browse files
Files changed (1) hide show
  1. app.py +3 -21
app.py CHANGED
@@ -1,15 +1,9 @@
1
  import gradio as gr
2
- import os
3
  from huggingface_hub import login
4
  from dotenv import load_dotenv
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
6
- import torch
7
  from threading import Thread
8
 
9
- load_dotenv()
10
- hf_token = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
11
- login(hf_token, add_to_git_credential=True)
12
-
13
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
14
 
15
  system_message = "You are a computer programmer that can translate python code to C++ in order to improve performance"
@@ -26,23 +20,14 @@ def messages_for(python):
26
  {"role": "user", "content": user_prompt_for(python)}
27
  ]
28
 
29
- quant_config = BitsAndBytesConfig(
30
- load_in_4bit=True,
31
- bnb_4bit_use_double_quant=True,
32
- bnb_4bit_compute_dtype=torch.bfloat16,
33
- bnb_4bit_quant_type="nf4"
34
- )
35
-
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
37
- tokenizer.pad_token = tokenizer.eos_token
38
  streamer = TextIteratorStreamer(tokenizer)
39
 
40
- model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", quantization_config=quant_config)
41
-
42
  cplusplus = None
43
  def translate(python):
44
- inputs = tokenizer.apply_chat_template(messages_for(python), return_tensors="pt").to("cuda")
45
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=80)
46
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
47
  thread.start()
48
  cplusplus = ""
@@ -50,8 +35,5 @@ def translate(python):
50
  cplusplus += chunk
51
  yield cplusplus
52
 
53
- del inputs
54
- torch.cuda.empty_cache()
55
-
56
  demo = gr.Interface(fn=translate, inputs="code", outputs="markdown")
57
  demo.launch()
 
1
  import gradio as gr
 
2
  from huggingface_hub import login
3
  from dotenv import load_dotenv
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
 
5
  from threading import Thread
6
 
 
 
 
 
7
  MODEL = "m-a-p/OpenCodeInterpreter-DS-33B"
8
 
9
  system_message = "You are a computer programmer that can translate python code to C++ in order to improve performance"
 
20
  {"role": "user", "content": user_prompt_for(python)}
21
  ]
22
 
 
 
 
 
 
 
 
23
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
24
+ model = AutoModelForCausalLM.from_pretrained(MODEL)
25
  streamer = TextIteratorStreamer(tokenizer)
26
 
 
 
27
  cplusplus = None
28
  def translate(python):
29
+ inputs = tokenizer(messages_for(python), return_tensors="pt")
30
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=20)
31
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
32
  thread.start()
33
  cplusplus = ""
 
35
  cplusplus += chunk
36
  yield cplusplus
37
 
 
 
 
38
  demo = gr.Interface(fn=translate, inputs="code", outputs="markdown")
39
  demo.launch()