Spaces:

Not-Grim-Refer
/

Falcoder-7b-Extended-Code-Generator

Runtime error

App Files Files Community

Not-Grim-Refer commited on Jun 22, 2023

Commit

c12c1d4

1 Parent(s): 465c5b4

Update app.py

Browse files

Files changed (1) hide show

app.py +116 -99

app.py CHANGED Viewed

@@ -1,36 +1,61 @@
-import os
 import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed
-model = AutoModelForCausalLM.from_pretrained("mrm8488/falcoder-7b", trust_remote_code=True)
-description = """# <h1 style="text-align: center; color: white;"><span style='color: #F26207;'> Code Completion with falcoder-7b </h1>
-<span style="color: white; text-align: center;"> falcoder-7b  You can click the button to generate your code.</span>"""
-token = os.environ["HUB_TOKEN"]
 device = "cuda" if torch.cuda.is_available() else "cpu"
-PAD_TOKEN = "<|pad|>"
-EOS_TOKEN = "<|endoftext|>"
-UNK_TOKEN = "<|unk|>"
-MAX_INPUT_TOKENS = 1024 # max tokens from context
-REPO = "mrm8488/falcoder-7b"
-tokenizer = AutoTokenizer.from_pretrained(REPO, use_auth_token=token, trust_remote_code=True)
-tokenizer.truncation_side = "left" # ensures if truncate, then keep the last N tokens of the prompt going L -> R
-if device == "cuda":
-    model = AutoModelForCausalLM.from_pretrained(REPO, use_auth_token=token, trust_remote_code=True, low_cpu_mem_usage=True).to(device, dtype=torch.bfloat16)
-else:
-    model = AutoModelForCausalLM.from_pretrained(REPO, use_auth_token=token, trust_remote_code=True, low_cpu_mem_usage=True)
-model.eval()
 custom_css = """
 .gradio-container {
     background-color: #0D1525;
@@ -46,18 +71,52 @@ custom_css = """
 """
 def post_processing(prompt, completion):
     return prompt + completion
-    # completion = "<span style='color: #499cd5;'>" + completion + "</span>"
-    # prompt = "<span style='color: black;'>" + prompt + "</span>"
-    # code_html = f"<hr><br><pre style='font-size: 14px'><code>{prompt}{completion}</code></pre><br><hr>"
-    # return code_html
 def code_generation(prompt, max_new_tokens, temperature=0.2, seed=42, top_p=0.9, top_k=None, use_cache=True, repetition_penalty=1.0):
-    # truncates the prompt to MAX_INPUT_TOKENS if its too long
     x = tokenizer.encode(prompt, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(device)
-    print("Prompt shape: ", x.shape) # just adding to see in the space logs in prod
     set_seed(seed)
     y = model.generate(x,
                        max_new_tokens=max_new_tokens,
@@ -71,75 +130,33 @@ def code_generation(prompt, max_new_tokens, temperature=0.2, seed=42, top_p=0.9,
                     )
     completion = tokenizer.decode(y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
     completion = completion[len(prompt):]
     return post_processing(prompt, completion)
-demo = gr.Blocks(
-    css=custom_css
-)
-with demo:
-    gr.Markdown(value=description)
-    with gr.Row():
-        input_col , settings_col  = gr.Column(scale=6), gr.Column(scale=6),
-        with input_col:
-            code = gr.Code(lines=28,label='Input', value="def sieve_eratosthenes(n):")
-        with settings_col:
-            with gr.Accordion("Generation Settings", open=True):
-                max_new_tokens= gr.Slider(
-                    minimum=8,
-                    maximum=128,
-                    step=1,
-                    value=48,
-                    label="Max Tokens",
-                )
-                temperature = gr.Slider(
-                    minimum=0.1,
-                    maximum=2.5,
-                    step=0.1,
-                    value=0.2,
-                    label="Temperature",
-                )
-                repetition_penalty = gr.Slider(
-                    minimum=1.0,
-                    maximum=1.9,
-                    step=0.1,
-                    value=1.0,
-                    label="Repetition Penalty. 1.0 means no penalty.",
-                )
-                seed = gr.Slider(
-                    minimum=0,
-                    maximum=1000,
-                    step=1,
-                    label="Random Seed"
-                )
-                top_p = gr.Slider(
-                    minimum=0.1,
-                    maximum=1.0,
-                    step=0.1,
-                    value=0.9,
-                    label="Top P",
-                )
-                top_k = gr.Slider(
-                    minimum=1,
-                    maximum=64,
-                    step=1,
-                    value=4,
-                    label="Top K",
-                )
-                use_cache = gr.Checkbox(
-                    label="Use Cache",
-                    value=True
-                )
-    with gr.Row():
-        run = gr.Button(elem_id="orange-button", value="Generate")
-    # with gr.Row():
-    #     # _, middle_col_row_2, _ = gr.Column(scale=1), gr.Column(scale=6), gr.Column(scale=1)
-    #     # with middle_col_row_2:
-    #     output = gr.HTML(label="Generated Code")
-    event = run.click(code_generation, [code, max_new_tokens, temperature, seed, top_p, top_k, use_cache, repetition_penalty], code, api_name="predict")
-demo.queue(max_size=40).launch()

+# Import necessary libraries
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import gradio as gr
 import torch
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Set device to GPU if available, otherwise CPU
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained("mrm8488/falcoder-7b")
+model = AutoModelForCausalLM.from_pretrained("mrm8488/falcoder-7b")
+def generate_text(prompt, max_length, do_sample, temperature, top_k, top_p):
+    """
+    Generates text completion given a prompt and specified parameters.
+    :param prompt: Input prompt for text generation.
+    :type prompt: str
+    :param max_length: Maximum length of generated text.
+    :type max_length: int
+    :param do_sample: Whether to use sampling for text generation.
+    :type do_sample: bool
+    :param temperature: Sampling temperature for text generation.
+    :type temperature: float
+    :param top_k: Value for top-k sampling.
+    :type top_k: int
+    :param top_p: Value for top-p sampling.
+    :type top_p: float
+    :return: Generated text completion.
+    :rtype: str
+    """
+    # Format prompt
+    formatted_prompt = "\n" + prompt
+    if not ',' in prompt:
+        formatted_prompt += ','
+    # Tokenize prompt and move to device
+    prompt = tokenizer(formatted_prompt, return_tensors='pt')
+    prompt = {key: value.to(device) for key, value in prompt.items()}
+    # Generate text completion using model and specified parameters
+    out = model.generate(**prompt, max_length=max_length, do_sample=do_sample, temperature=temperature,
+                          no_repeat_ngram_size=3, top_k=top_k, top_p=top_p)
+    output = tokenizer.decode(out[0])
+    clean_output = output.replace('\n', '\n')
+    # Log generated text completion
+    logger.info("Text generated: %s", clean_output)
+    return clean_output
+# Define Gradio interface
 custom_css = """
 .gradio-container {
     background-color: #0D1525;
 """
 def post_processing(prompt, completion):
+    """
+    Formats generated text completion for display.
+    :param prompt: Input prompt for text generation.
+    :type prompt: str
+    :param completion: Generated text completion.
+    :type completion: str
+    :return: Formatted text completion.
+    :rtype: str
+    """
     return prompt + completion
 def code_generation(prompt, max_new_tokens, temperature=0.2, seed=42, top_p=0.9, top_k=None, use_cache=True, repetition_penalty=1.0):
+    """
+    Generates code completion given a prompt and specified parameters.
+    :param prompt: Input prompt for code generation.
+    :type prompt: str
+    :param max_new_tokens: Maximum number of tokens to generate.
+    :type max_new_tokens: int
+    :param temperature: Sampling temperature for code generation.
+    :type temperature: float
+    :param seed: Random seed for code generation.
+    :type seed: int
+    :param top_p: Value for top-p sampling.
+    :type top_p: float
+    :param top_k: Value for top-k sampling.
+    :type top_k: int
+    :param use_cache: Whether to use cache for code generation.
+    :type use_cache: bool
+    :param repetition_penalty: Value for repetition penalty.
+    :type repetition_penalty: float
+    :return: Generated code completion.
+    :rtype: str
+    """
+    # Truncate prompt if too long
+    MAX_INPUT_TOKENS = 2048
+    if len(prompt) > MAX_INPUT_TOKENS:
+        prompt = prompt[-MAX_INPUT_TOKENS:]
+    # Tokenize prompt and move to device
     x = tokenizer.encode(prompt, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(device)
+    logger.info("Prompt shape: %s", x.shape)
+    # Generate code completion using model and specified parameters
     set_seed(seed)
     y = model.generate(x,
                        max_new_tokens=max_new_tokens,
                     )
     completion = tokenizer.decode(y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
     completion = completion[len(prompt):]
     return post_processing(prompt, completion)
+description = """
+### Falcoder
+Falcoder is a GPT-2 model fine-tuned on Python code. It can be used for generating code completions given a prompt.
+### Text Generation
+Use the text generation section to generate text completions given a prompt. You can adjust the maximum length of the generated text, whether to use sampling, the sampling temperature, and the top-k and top-p values for sampling.
+### Code Generation
+Use the code generation section to generate code completions given a prompt. You can adjust the maximum number of tokens to generate, the sampling temperature, the random seed, the top-p and top-k values for sampling, whether to use cache, and the repetition penalty.
+"""
+demo = gr.Interface(
+    [generate_text, code_generation],
+    ["textbox", "textbox"],
+    ["textbox", "textbox"],
+    title="Falcoder",
+    description=description,
+    theme="compact",
+    layout="vertical",
+    css=custom_css
+)
+# Launch Gradio interface
+demo.launch()