Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os, re, gc, time, sys, subprocess, typing, shutil, json, datetime, tempfile, safetensors, torch, threading, spaces
|
| 3 |
+
import numpy as np
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 5 |
+
from accelerate import Accelerator
|
| 6 |
+
from huggingface_hub import login
|
| 7 |
+
HF_TOKEN=os.environ.get('HF_TOKEN')
|
| 8 |
+
login(token=HF_TOKEN)
|
| 9 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 10 |
+
os.environ["ZERO_GPU_PATCH_TORCH_DEVICE"] = "True"
|
| 11 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 12 |
+
accelerator=Accelerator()
|
| 13 |
+
|
| 14 |
+
TOKENIZER = accelerator.prepare(AutoTokenizer.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct"))
|
| 15 |
+
MODEL = accelerator.prepare(AutoModelForCausalLM.from_pretrained("ByteDance-Seed/Seed-Coder-8B-Instruct", torch_dtype=torch.bfloat16,))
|
| 16 |
+
|
| 17 |
+
@spaces.GPU()
|
| 18 |
+
def plex(pmpt, input_text):
|
| 19 |
+
model_inputs=""
|
| 20 |
+
generated_text=""
|
| 21 |
+
prompt = f"{pmpt}. The code: {input_text}"
|
| 22 |
+
messages = [{"role": "system", "content": "Acting as an expert AI Web Development and Programming model. As an expert AI Web Development and Programming model, correct/modify/create/generate/complete the user provided code based on user request, output the code without comments. Only output code without additional prefixed or suffixed messages/comments/wrapping or additional text."}, {"role": "user", "content": prompt}]
|
| 23 |
+
text = TOKENIZER.apply_chat_template(messages,tokenize=False,add_generation_prompt=True)
|
| 24 |
+
model_inputs = TOKENIZER([text], return_tensors="pt", return_token_type_ids=False).to(device)
|
| 25 |
+
generated_text = ""
|
| 26 |
+
for new_text in TOKENIZER.decode(MODEL.generate(**model_inputs, max_new_tokens=8192, do_sample=True, temperature=0.7, top_k=30, top_p=0.8,)[0][len(model_inputs.input_ids[0]):], skip_special_tokens=True):
|
| 27 |
+
generated_text += new_text
|
| 28 |
+
yield generated_text.strip()
|
| 29 |
+
yield generated_text.strip()
|
| 30 |
+
|
| 31 |
+
with gr.Blocks() as iface:
|
| 32 |
+
ins=gr.Code(lines=30)
|
| 33 |
+
pmpt=gr.Textbox()
|
| 34 |
+
btn=gr.Button("Assist")
|
| 35 |
+
btn.click(plex,[pmpt,ins],[ins])
|
| 36 |
+
iface.queue()
|
| 37 |
+
iface.launch(ssr_mode=False,share=False)
|