# Import necessary libraries from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr import torch import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Set device to GPU if available, otherwise CPU device = "cuda" if torch.cuda.is_available() else "cpu" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained("mrm8488/falcoder-7b") model = AutoModelForCausalLM.from_pretrained("mrm8488/falcoder-7b") def generate_text(prompt, max_length, do_sample, temperature, top_k, top_p): """ Generates text completion given a prompt and specified parameters. :param prompt: Input prompt for text generation. :type prompt: str :param max_length: Maximum length of generated text. :type max_length: int :param do_sample: Whether to use sampling for text generation. :type do_sample: bool :param temperature: Sampling temperature for text generation. :type temperature: float :param top_k: Value for top-k sampling. :type top_k: int :param top_p: Value for top-p sampling. :type top_p: float :return: Generated text completion. :rtype: str """ # Format prompt formatted_prompt = "\n" + prompt if not ',' in prompt: formatted_prompt += ',' # Tokenize prompt and move to device prompt = tokenizer(formatted_prompt, return_tensors='pt') prompt = {key: value.to(device) for key, value in prompt.items()} # Generate text completion using model and specified parameters out = model.generate(**prompt, max_length=max_length, do_sample=do_sample, temperature=temperature, no_repeat_ngram_size=3, top_k=top_k, top_p=top_p) output = tokenizer.decode(out[0]) clean_output = output.replace('\n', '\n') # Log generated text completion logger.info("Text generated: %s", clean_output) return clean_output # Define Gradio interface custom_css = """ .gradio-container { background-color: #0D1525; color:white } #orange-button { background: #F26207 !important; color: white; } .cm-gutters{ border: none !important; } """ def post_processing(prompt, completion): """ Formats generated text completion for display. :param prompt: Input prompt for text generation. :type prompt: str :param completion: Generated text completion. :type completion: str :return: Formatted text completion. :rtype: str """ return prompt + completion def code_generation(prompt, max_new_tokens, temperature=0.2, seed=42, top_p=0.9, top_k=None, use_cache=True, repetition_penalty=1.0): """ Generates code completion given a prompt and specified parameters. :param prompt: Input prompt for code generation. :type prompt: str :param max_new_tokens: Maximum number of tokens to generate. :type max_new_tokens: int :param temperature: Sampling temperature for code generation. :type temperature: float :param seed: Random seed for code generation. :type seed: int :param top_p: Value for top-p sampling. :type top_p: float :param top_k: Value for top-k sampling. :type top_k: int :param use_cache: Whether to use cache for code generation. :type use_cache: bool :param repetition_penalty: Value for repetition penalty. :type repetition_penalty: float :return: Generated code completion. :rtype: str """ # Truncate prompt if too long MAX_INPUT_TOKENS = 2048 if len(prompt) > MAX_INPUT_TOKENS: prompt = prompt[-MAX_INPUT_TOKENS:] # Tokenize prompt and move to device x = tokenizer.encode(prompt, return_tensors="pt", max_length=MAX_INPUT_TOKENS, truncation=True).to(device) logger.info("Prompt shape: %s", x.shape) # Generate code completion using model and specified parameters set_seed(seed) y = model.generate(x, max_new_tokens=max_new_tokens, temperature=temperature, pad_token_id=tokenizer.pad_token_id, eos_token_id=tokenizer.eos_token_id, top_p=top_p, top_k=top_k, use_cache=use_cache, repetition_penalty=repetition_penalty ) completion = tokenizer.decode(y[0], skip_special_tokens=True, clean_up_tokenization_spaces=False) completion = completion[len(prompt):] return post_processing(prompt, completion) description = """ ### Falcoder Falcoder is a GPT-2 model fine-tuned on Python code. It can be used for generating code completions given a prompt. ### Text Generation Use the text generation section to generate text completions given a prompt. You can adjust the maximum length of the generated text, whether to use sampling, the sampling temperature, and the top-k and top-p values for sampling. ### Code Generation Use the code generation section to generate code completions given a prompt. You can adjust the maximum number of tokens to generate, the sampling temperature, the random seed, the top-p and top-k values for sampling, whether to use cache, and the repetition penalty. """ demo = gr.Interface( [generate_text, code_generation], ["textbox", "textbox"], ["textbox", "textbox"], title="Falcoder", description=description, theme="compact", layout="vertical", css=custom_css ) # Launch Gradio interface demo.launch()