Upload 9 files

Browse files

Files changed (9) hide show

Figure_1.png +0 -0
all_in_one.py +115 -0
app.py +39 -0
custom_dataset.jsonl +50 -0
document.txt +92 -0
finetune_codegen.py +129 -0
requirements.txt +7 -0
templates/index.html +77 -0
test.py +34 -0

Figure_1.png ADDED Viewed

all_in_one.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TrainerCallback
+from datasets import load_from_disk
+import matplotlib.pyplot as plt
+# Set Hugging Face token (replace with your actual token)
+os.environ["HF_TOKEN"] = "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"  # Replace with your HF_TOKEN
+# Download model and tokenizer
+model_name = "Salesforce/codegen-350M-multi"
+local_model_path = "./codegen_model"
+tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=local_model_path)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, cache_dir=local_model_path)
+# Set padding token
+tokenizer.pad_token = tokenizer.eos_token
+# Move model to CPU
+device = torch.device("cpu")
+model.to(device)
+# Load custom dataset
+dataset_path = "./custom_dataset"
+dataset = load_from_disk(dataset_path)
+# Tokenize dataset
+def tokenize_function(examples):
+    inputs = [f"{prompt}\n{code}" for prompt, code in zip(examples["prompt"], examples["code"])]
+    return tokenizer(inputs, truncation=True, padding="max_length", max_length=128)
+tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["prompt", "code"])
+# Data collator for language modeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+# Define training arguments
+training_args = TrainingArguments(
+    output_dir="./finetuned_codegen",
+    overwrite_output_dir=True,
+    num_train_epochs=5,  # Increased epochs for better fine-tuning
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=4,
+    save_steps=500,
+    save_total_limit=2,
+    logging_steps=100,
+    learning_rate=5e-5,
+    fp16=False,
+    no_cuda=True,
+    dataloader_pin_memory=False,
+)
+# Custom callback to store training loss
+class LossCallback(TrainerCallback):
+    def __init__(self):
+        self.losses = []
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if logs and "loss" in logs:
+            self.losses.append(logs["loss"])
+loss_callback = LossCallback()
+# Initialize Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset,
+    data_collator=data_collator,
+    callbacks=[loss_callback],
+)
+# Start fine-tuning
+print("Starting fine-tuning...")
+trainer.train()
+# Save fine-tuned model
+model.save_pretrained("./finetuned_codegen")
+tokenizer.save_pretrained("./finetuned_codegen")
+# Plot training loss
+plt.plot(loss_callback.losses, label="Training Loss")
+plt.xlabel("Steps")
+plt.ylabel("Loss")
+plt.title("Fine-Tuning Loss Curve")
+plt.legend()
+plt.savefig("./finetuned_codegen/loss_plot.png")
+plt.show()
+print("Fine-tuning completed. Model saved to ./finetuned_codegen. Loss plot saved to ./finetuned_codegen/loss_plot.png")
+# Test fine-tuned model
+print("\nTesting fine-tuned model...")
+prompts = [
+    "Write a Python program to print 'Hello, World!'",
+    "Write a Python function to add two numbers.",
+    "Write a Python function to subtract two numbers.",
+    "Write a Python function to calculate factorial of a number",
+    "Write a Python function to check if a number is prime",
+    "Write a Python function to reverse a string"
+]
+for prompt in prompts:
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
+    outputs = model.generate(
+        **inputs,
+        max_length=200,
+        num_return_sequences=1,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Prompt: {prompt}\nGenerated Code:\n{generated_code}\n{'-'*50}")

app.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from flask import Flask, render_template, request
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+app = Flask(__name__)
+# Load fine-tuned model and tokenizer
+model_path = "./finetuned_codegen"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
+# Set padding token
+tokenizer.pad_token = tokenizer.eos_token
+# Move model to CPU
+device = torch.device("cpu")
+model.to(device)
+@app.route("/", methods=["GET", "POST"])
+def index():
+    generated_code = ""
+    prompt = ""
+    if request.method == "POST":
+        prompt = request.form["prompt"]
+        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
+        outputs = model.generate(
+            **inputs,
+            max_length=200,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            temperature=0.7,
+            top_p=0.9
+        )
+        generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return render_template("index.html", generated_code=generated_code, prompt=prompt)
+if __name__ == "__main__":
+    app.run(debug=True)

custom_dataset.jsonl ADDED Viewed

	@@ -0,0 +1,50 @@

+{"prompt": "Write a Python program to print 'Hello, World!'", "code": "print('Hello, World!')"}
+{"prompt": "Write a Python function to add two numbers.", "code": "def add_numbers(a, b):\n    return a + b"}
+{"prompt": "Write a Python function to subtract two numbers.", "code": "def subtract_numbers(a, b):\n    return a - b"}
+{"prompt": "Write a Python function to multiply two numbers.", "code": "def multiply_numbers(a, b):\n    return a * b"}
+{"prompt": "Write a Python function to divide two numbers.", "code": "def divide_numbers(a, b):\n    if b == 0:\n        return 'Error: Division by zero'\n    return a / b"}
+{"prompt": "Write a Python function to calculate the area of a rectangle.", "code": "def rectangle_area(length, width):\n    return length * width"}
+{"prompt": "Write a Python function to calculate the circumference of a circle.", "code": "import math\ndef circle_circumference(radius):\n    return 2 * math.pi * radius"}
+{"prompt": "Write a Python function to calculate the area of a circle.", "code": "import math\ndef circle_area(radius):\n    return math.pi * radius**2"}
+{"prompt": "Write a Python function to convert Celsius to Fahrenheit.", "code": "def celsius_to_fahrenheit(celsius):\n    return (celsius * 9/5) + 32"}
+{"prompt": "Write a Python function to convert Fahrenheit to Celsius.", "code": "def fahrenheit_to_celsius(fahrenheit):\n    return (fahrenheit - 32) * 5/9"}
+{"prompt": "Write a Python function to check if a number is even.", "code": "def is_even(n):\n    return n % 2 == 0"}
+{"prompt": "Write a Python function to check if a number is odd.", "code": "def is_odd(n):\n    return n % 2 != 0"}
+{"prompt": "Write a Python function to find the maximum of two numbers.", "code": "def find_max(a, b):\n    if a > b:\n        return a\n    else:\n        return b"}
+{"prompt": "Write a Python function to find the minimum of two numbers.", "code": "def find_min(a, b):\n    if a < b:\n        return a\n    else:\n        return b"}
+{"prompt": "Write a Python function to calculate the sum of numbers in a list.", "code": "def sum_list(numbers):\n    total = 0\n    for num in numbers:\n        total += num\n    return total"}
+{"prompt": "Write a Python function to calculate the average of numbers in a list.", "code": "def average_list(numbers):\n    if not numbers:\n        return 0\n    return sum(numbers) / len(numbers)"}
+{"prompt": "Write a Python function to find the largest number in a list.", "code": "def find_largest(numbers):\n    if not numbers:\n        return None\n    largest = numbers[0]\n    for num in numbers:\n        if num > largest:\n            largest = num\n    return largest"}
+{"prompt": "Write a Python function to find the smallest number in a list.", "code": "def find_smallest(numbers):\n    if not numbers:\n        return None\n    smallest = numbers[0]\n    for num in numbers:\n        if num < smallest:\n            smallest = num\n    return smallest"}
+{"prompt": "Write a Python function to reverse a string.", "code": "def reverse_string(s):\n    return s[::-1]"}
+{"prompt": "Write a Python function to check if a string is a palindrome.", "code": "def is_palindrome(s):\n    return s == s[::-1]"}
+{"prompt": "Write a Python function to count the number of vowels in a string.", "code": "def count_vowels(s):\n    vowels = 'aeiouAEIOU'\n    count = 0\n    for char in s:\n        if char in vowels:\n            count += 1\n    return count"}
+{"prompt": "Write a Python function to convert a string to uppercase.", "code": "def to_uppercase(s):\n    return s.upper()"}
+{"prompt": "Write a Python function to convert a string to lowercase.", "code": "def to_lowercase(s):\n    return s.lower()"}
+{"prompt": "Write a Python function to find the length of a string.", "code": "def string_length(s):\n    return len(s)"}
+{"prompt": "Write a Python function to check if a list is empty.", "code": "def is_list_empty(lst):\n    return len(lst) == 0"}
+{"prompt": "Write a Python function to append an element to a list.", "code": "def append_to_list(lst, element):\n    lst.append(element)\n    return lst"}
+{"prompt": "Write a Python function to remove an element from a list.", "code": "def remove_from_list(lst, element):\n    if element in lst:\n        lst.remove(element)\n    return lst"}
+{"prompt": "Write a Python function to sort a list of numbers in ascending order.", "code": "def sort_list_ascending(numbers):\n    return sorted(numbers)"}
+{"prompt": "Write a Python function to sort a list of numbers in descending order.", "code": "def sort_list_descending(numbers):\n    return sorted(numbers, reverse=True)"}
+{"prompt": "Write a Python function to find the index of an element in a list.", "code": "def find_index(lst, element):\n    try:\n        return lst.index(element)\n    except ValueError:\n        return -1"}
+{"prompt": "Write a Python function to check if an element exists in a list.", "code": "def element_exists(lst, element):\n    return element in lst"}
+{"prompt": "Write a Python function to get the first element of a list.", "code": "def get_first_element(lst):\n    if lst:\n        return lst[0]\n    else:\n        return None"}
+{"prompt": "Write a Python function to get the last element of a list.", "code": "def get_last_element(lst):\n    if lst:\n        return lst[-1]\n    else:\n        return None"}
+{"prompt": "Write a Python function to create a dictionary from two lists (keys and values).", "code": "def create_dictionary(keys, values):\n    return dict(zip(keys, values))"}
+{"prompt": "Write a Python function to get a value from a dictionary by its key.", "code": "def get_dictionary_value(d, key):\n    return d.get(key)"}
+{"prompt": "Write a Python function to add a new key-value pair to a dictionary.", "code": "def add_to_dictionary(d, key, value):\n    d[key] = value\n    return d"}
+{"prompt": "Write a Python function to remove a key-value pair from a dictionary.", "code": "def remove_from_dictionary(d, key):\n    if key in d:\n        del d[key]\n    return d"}
+{"prompt": "Write a Python function to check if a key exists in a dictionary.", "code": "def key_exists_in_dictionary(d, key):\n    return key in d"}
+{"prompt": "Write a Python program to get user input and print it.", "code": "user_input = input('Enter something: ')\nprint('You entered:', user_input)"}
+{"prompt": "Write a Python function to greet a user by name.", "code": "def greet_user(name):\n    return f'Hello, {name}!'"}
+{"prompt": "Write a Python function to calculate the square of a number.", "code": "def square_number(n):\n    return n * n"}
+{"prompt": "Write a Python function to calculate the cube of a number.", "code": "def cube_number(n):\n    return n ** 3"}
+{"prompt": "Write a Python function to check if a year is a leap year.", "code": "def is_leap_year(year):\n    if (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0):\n        return True\n    else:\n        return False"}
+{"prompt": "Write a Python function to count occurrences of a character in a string.", "code": "def count_character(s, char):\n    return s.count(char)"}
+{"prompt": "Write a Python function to find the absolute value of a number.", "code": "def absolute_value(n):\n    return abs(n)"}
+{"prompt": "Write a Python function to generate a sequence of numbers.", "code": "def generate_sequence(start, end, step):\n    return list(range(start, end, step))"}
+{"prompt": "Write a Python function to check if a list contains duplicates.", "code": "def has_duplicates(lst):\n    return len(lst) != len(set(lst))"}
+{"prompt": "Write a Python function to get the current date.", "code": "from datetime import date\ndef get_current_date():\n    return date.today()"}
+{"prompt": "Write a Python function to get the current time.", "code": "from datetime import datetime\ndef get_current_time():\n    return datetime.now().time()"}
+{"prompt": "Write a Python function to simulate a simple coin flip (Heads or Tails).", "code": "import random\ndef coin_flip():\n    return random.choice(['Heads', 'Tails'])"}

document.txt ADDED Viewed

	@@ -0,0 +1,92 @@

+Text-to-Code Generator using CodeGen-350M-Multi
+=============================================
+This project provides a text-to-code generator using a fine-tuned Salesforce/codegen-350M-multi
+model, designed to run on low-end laptops (8GB RAM, CPU-only) for students to experiment with AI
+model development. The model is fine-tuned on a custom dataset and includes a Flask web interface
+for easy interaction. All resources are open-source under the Apache-2.0 license, with attribution
+to the original model by Salesforce.
+Do's and Setup Process
+---------------------
+1. **System Requirements**:
+   - Laptop with at least 8GB RAM and 2GB free disk space.
+   - Windows, macOS, or Linux (CPU-only, no GPU required).
+   - Internet connection for initial model download.
+2. **Install Python**:
+   - Use Python 3.10.9. Download from https://www.python.org/downloads/release/python-3109/.
+   - Verify installation: `python --version`.
+3. **Clone or Download Repository**:
+   - Download the project files from the Hugging Face repository:
+   https://huggingface.co/remiai3/text-to-code-using-codegen-project.
+   - Extract files to a folder (e.g., `text-to-code-codegen`).
+4. **Set Up Virtual Environment**:
+   - Open a terminal in the project folder.
+   - Create a virtual environment: `python -m venv venv`.
+   - Activate it:
+     - Windows: `venv\Scripts\activate`
+     - macOS/Linux: `source venv/bin/activate`
+5. **Install Dependencies**:
+   - Run: `pip install -r requirements.txt`.
+   - Required libraries: torch, transformers, datasets, accelerate, protobuf, matplotlib, flask.
+      NOTE: if the matplotlib version is not compatible remove the version 3.7.2 and also if any
+      other library is also not compitable with the python version or local device because of
+      previous libraries installed then remove all the versions from the libraries and install the
+      libraries with the names only then a default version will installed of that particualr library
+6. **Prepare Custom Dataset**:
+   - Ensure the `custom_dataset.jsonl` file exists in the project folder.
+   - Format: Each line is a JSON object with `prompt` (natural language) and `code` (Python code).
+   - Example:
+     {"prompt": "Write a Python program to print 'Hello, World!'", "code": "print('Hello, World!')"}
+     {"prompt": "Write a Python function to add two numbers.", "code": "def add_numbers(a, b):\n    return a + b"}
+7. **Run the Model**:
+   - Option 1: Run the full pipeline (download, fine-tune, test):
+     - Update `run_all.py` with your Hugging Face token (`HF_TOKEN`).
+     - Run: `python run_all.py`.
+     - This downloads the model, fine-tunes it, tests it, and generates a loss plot.
+   - Option 2: Test the fine-tuned model directly:
+     - Run: `python test_codegen.py` to test with sample prompts.
+   - Option 3: Use the web interface:
+     - Run: `python app.py`.
+     - Open a browser and go to `http://127.0.0.1:5000`.
+8. **Using the AI Model**:
+   - **Command Line Testing**: Use `test_codegen.py` to input prompts and generate Python code.
+   - **Web Interface**: Use the Flask app (`app.py`) to enter prompts via a browser and view generated code.
+   - Example prompts:
+     - "Write a Python function to calculate factorial of a number"
+     - "Write a Python function to check if a number is prime"
+   - Output is saved in `./finetuned_codegen/loss_plot.png` (loss plot) and `./finetuned_codegen`
+      (model weights).
+9. **Model Details**:
+   - Model: Salesforce/codegen-350M-multi (Apache-2.0 license).
+   - Source: https://huggingface.co/Salesforce/codegen-350M-multi.
+   - Fine-tuned on a custom dataset for Python code generation.
+   - Attribution: This project uses the Salesforce CodeGen model, fine-tuned by remiai3 for
+     educational purposes.
+10. **Troubleshooting**:
+    - Ensure ~2GB disk space for model weights.
+    - If memory issues occur, reduce dataset size or batch size in `run_all.py`.
+    - Check terminal output for errors and ensure all files (`custom_dataset.jsonl`,
+      `finetuned_codegen`) are in place.
+11. **Contributing**:
+    - Add more examples to `custom_dataset.jsonl` to improve model performance.
+    - Share feedback or improvements via the Hugging Face repository:
+      https://huggingface.co/remiai3.
+Attribution
+-----------
+This project is built using the Salesforce/codegen-350M-multi model, licensed under Apache-2.0.
+The fine-tuned model and resources are provided by remiai3 for free educational use to help students
+learn and experiment with AI models.

finetune_codegen.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments, DataCollatorForLanguageModeling, TrainerCallback
+from datasets import load_dataset
+import matplotlib.pyplot as plt
+# Set Hugging Face token (replace with your actual token)
+os.environ["HF_TOKEN"] = "hf_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+# Recommended for download stability, if you had issues before
+os.environ["HF_HUB_DOWNLOAD_TIMEOUT"] = "600" # 10 minutes timeout
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" # Enable robust downloader
+# Download model and tokenizer
+model_name = "Salesforce/codegen-350M-multi"
+local_model_path = "./codegen_model"
+print(f"Attempting to download/load tokenizer from {model_name} to {local_model_path}...")
+tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=local_model_path)
+print("Tokenizer loaded.")
+print(f"Attempting to download/load model from {model_name} to {local_model_path}...")
+# Removed torch_dtype=torch.float16 as it's typically for GPU and might not help on CPU
+# and could even cause unexpected behavior on some CPU setups.
+model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=local_model_path)
+print("Model loaded.")
+# Set padding token
+tokenizer.pad_token = tokenizer.eos_token
+# Move model to CPU
+device = torch.device("cpu")
+model.to(device)
+print(f"Model moved to {device}.")
+# Load custom dataset from JSONL file
+dataset_file = "custom_dataset.jsonl"
+print(f"Loading dataset from {dataset_file}...")
+dataset = load_dataset('json', data_files=dataset_file, split='train')
+print("Dataset loaded.")
+print(f"Dataset size: {len(dataset)} examples.")
+print(f"First example of dataset: {dataset[0]}") # Print first example to check data format
+# Tokenize dataset
+def tokenize_function(examples):
+    inputs = [f"{prompt}\n{code}" for prompt, code in zip(examples["prompt"], examples["code"])]
+    # --- REDUCED MAX_LENGTH TO SAVE MEMORY ---
+    return tokenizer(inputs, truncation=True, padding="max_length", max_length=64) # Try 64 or even 32 if 128 is too much
+print("Tokenizing dataset...")
+tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["prompt", "code"])
+print("Dataset tokenized.")
+print(f"First tokenized example: {tokenized_dataset[0]}")
+# Data collator for language modeling
+data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
+# Define training arguments
+training_args = TrainingArguments(
+    output_dir="./finetuned_codegen",
+    overwrite_output_dir=True,
+    num_train_epochs=3,
+    # --- AGGRESSIVELY REDUCED BATCH SIZE AND GRADIENT ACCUMULATION FOR CPU ---
+    per_device_train_batch_size=1,
+    gradient_accumulation_steps=1, # No accumulation, true batch size of 1
+    save_steps=500,
+    save_total_limit=2,
+    logging_steps=10, # Log more frequently to see if it starts moving
+    learning_rate=5e-5,
+    fp16=False, # Keep False for CPU
+    use_cpu=True, # Use this instead of no_cuda=True
+    dataloader_pin_memory=False, # Disable pin_memory for CPU
+    report_to="none", # Disable reporting to avoid potential hangs
+    gradient_checkpointing=True, # Keep this, it helps with memory on CPU too
+    max_grad_norm=1.0,
+)
+# Custom callback to store training loss
+class LossCallback(TrainerCallback):
+    def __init__(self):
+        self.losses = []
+        self.log_steps = []
+    def on_log(self, args, state, control, logs=None, **kwargs):
+        if logs and "loss" in logs:
+            self.losses.append(logs["loss"])
+            self.log_steps.append(state.global_step)
+            print(f"Step {state.global_step}: Loss = {logs['loss']:.4f}")
+loss_callback = LossCallback()
+# Initialize Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset,
+    data_collator=data_collator,
+    callbacks=[loss_callback],
+)
+# Start fine-tuning
+print("Starting fine-tuning...")
+print("WARNING: Training on CPU will be extremely slow. The 0% progress bar might take a very long time to update.")
+print("Please monitor your system's RAM and CPU usage.")
+trainer.train()
+print("Fine-tuning finished.")
+# Save fine-tuned model
+model.save_pretrained("./finetuned_codegen")
+tokenizer.save_pretrained("./finetuned_codegen")
+print("Model fine-tuned and saved to ./finetuned_codegen.")
+# Plot training loss
+if loss_callback.losses:
+    plt.figure(figsize=(10, 6))
+    plt.plot(loss_callback.log_steps, loss_callback.losses, label="Training Loss")
+    plt.xlabel("Steps")
+    plt.ylabel("Loss")
+    plt.title("Fine-Tuning Loss Curve")
+    plt.legend()
+    plt.grid(True)
+    plot_path = "./finetuned_codegen/loss_plot.png"
+    plt.savefig(plot_path)
+    print(f"Loss plot saved to {plot_path}")
+else:
+    print("No training losses recorded to plot.")
+plt.show()
+print("Fine-tuning script finished execution.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+torch==2.0.1
+transformers==4.31.0
+datasets==2.14.4
+accelerate==0.21.0
+protobuf==4.23.4
+matplotlib==3.7.2
+flask==2.3.2

templates/index.html ADDED Viewed

	@@ -0,0 +1,77 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Text-to-Code Generator</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #f4f4f9;
+        }
+        .container {
+            max-width: 800px;
+            margin: auto;
+            background: white;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
+        }
+        h1 {
+            text-align: center;
+            color: #333;
+        }
+        textarea {
+            width: 100%;
+            height: 100px;
+            margin-bottom: 10px;
+            padding: 10px;
+            border-radius: 4px;
+            border: 1px solid #ccc;
+        }
+        button {
+            padding: 10px 20px;
+            background-color: #007bff;
+            color: white;
+            border: none;
+            border-radius: 4px;
+            cursor: pointer;
+        }
+        button:hover {
+            background-color: #0056b3;
+        }
+        pre {
+            background-color: #f8f9fa;
+            padding: 10px;
+            border-radius: 4px;
+            overflow-x: auto;
+        }
+        .attribution {
+            margin-top: 20px;
+            font-size: 0.9em;
+            color: #555;
+            text-align: center;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <h1>Text-to-Code Generator</h1>
+        <form method="POST">
+            <textarea name="prompt" placeholder="Enter a prompt (e.g., 'Write a Python function to calculate factorial')" required>{{ prompt }}</textarea>
+            <button type="submit">Generate Code</button>
+        </form>
+        {% if generated_code %}
+            <h2>Generated Code:</h2>
+            <pre>{{ generated_code }}</pre>
+        {% endif %}
+        <div class="attribution">
+            <p>Built with the fine-tuned </a href="https://huggingface.co/Salesforce/codegen-350M-multi" target="_blank">Salesforce/codegen-350M-multi</a> model.</p>
+            <p>Developed by </a href="https://huggingface.co/remiai3" target="_blank">remiai3</a> for educational use. Licensed under Apache-2.0.</p>
+            <p>Free resources for students to experiment with AI model development.</p>
+        </div>
+    </div>
+</body>
+</html>

test.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Load fine-tuned model and tokenizer
+model_path = "./finetuned_codegen"
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16)
+# Set padding token
+tokenizer.pad_token = tokenizer.eos_token
+# Move model to CPU
+device = torch.device("cpu")
+model.to(device)
+# Test prompts (including dataset prompts)
+prompts = [
+    "Write a Python program to print 'Hello, you name or any other thing!'"
+]
+# Generate code for each prompt
+for prompt in prompts:
+    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
+    outputs = model.generate(
+        **inputs,
+        max_length=200,
+        num_return_sequences=1,
+        pad_token_id=tokenizer.eos_token_id,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9
+    )
+    generated_code = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    print(f"Prompt: {prompt}\nGenerated Code:\n{generated_code}\n{'-'*50}")