Spaces:

Zlovoblachko
/

lang_learn_app

Running

File size: 45,630 Bytes

import gradio as gr
import sqlite3
import json
import os
from datetime import datetime
import torch
import nltk
from transformers import (
    T5Tokenizer, 
    T5ForConditionalGeneration, 
    ElectraTokenizer, 
    ElectraForTokenClassification
)
import torch.nn as nn
from tqdm import tqdm

# Download NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

class HuggingFaceT5GEDInference:
    def __init__(self, model_name="Zlovoblachko/REAlEC_2step_model_testing", 
                 ged_model_name="Zlovoblachko/11tag-electra-grammar-stage2", device=None):
        """
        Initialize the inference class for T5-GED model from HuggingFace
        
        Args:
            model_name: HuggingFace model name/path for the T5-GED model
            ged_model_name: HuggingFace model name/path for the GED model
            device: Device to run inference on (cuda/cpu)
        """
        self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Load GED model and tokenizer (same as training)
        print(f"Loading GED model from HuggingFace: {ged_model_name}...")
        self.ged_model, self.ged_tokenizer = self._load_ged_model(ged_model_name)
        
        # Load T5 model and tokenizer from HuggingFace
        print(f"Loading T5 model from HuggingFace: {model_name}...")
        self.t5_tokenizer = T5Tokenizer.from_pretrained(model_name)
        self.t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
        self.t5_model.to(self.device)
        
        # Create GED encoder (copy of T5 encoder)
        self.ged_encoder = T5ForConditionalGeneration.from_pretrained(model_name).encoder
        self.ged_encoder.to(self.device)
        
        # Create gating mechanism
        encoder_hidden_size = self.t5_model.config.d_model
        self.gate = nn.Linear(2 * encoder_hidden_size, 1)
        self.gate.to(self.device)
        
        # Try to load GED components from HuggingFace
        try:
            print("Loading GED components...")
            from huggingface_hub import hf_hub_download
            ged_components_path = hf_hub_download(
                repo_id=model_name,
                filename="ged_components.pt",
                cache_dir=None
            )
            ged_components = torch.load(ged_components_path, map_location=self.device)
            self.ged_encoder.load_state_dict(ged_components["ged_encoder"])
            self.gate.load_state_dict(ged_components["gate"])
            print("GED components loaded successfully!")
        except Exception as e:
            print(f"Warning: Could not load GED components: {e}")
            print("Using default initialization for GED encoder and gate.")
        
        # Set to evaluation mode
        self.t5_model.eval()
        self.ged_encoder.eval()
        self.gate.eval()
        
    def _load_ged_model(self, model_name):
        """Load GED model and tokenizer from HuggingFace"""
        tokenizer = ElectraTokenizer.from_pretrained(model_name)
        model = ElectraForTokenClassification.from_pretrained(model_name)
        model.to(self.device)
        model.eval()
        return model, tokenizer
    
    def _get_ged_predictions(self, text):
        """Get GED predictions for input text - exact same as training preprocessing"""
        inputs = self.ged_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
        with torch.no_grad():
            outputs = self.ged_model(**inputs)
            logits = outputs.logits
        predictions = torch.argmax(logits, dim=2)
        token_predictions = predictions[0].cpu().numpy().tolist()
        tokens = self.ged_tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
        
        ged_tags = []
        for token, pred in zip(tokens, token_predictions):
            if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
                continue
            ged_tags.append(str(pred))
        
        return " ".join(ged_tags), tokens, token_predictions
    
    def _get_error_spans(self, text):
        """Extract error spans with simplified categories for display"""
        ged_tags_str, tokens, predictions = self._get_ged_predictions(text)
        
        error_spans = []
        clean_tokens = []
        
        for token, pred in zip(tokens, predictions):
            if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
                continue
            clean_tokens.append(token)
            
            if pred != 0:  # 0 is correct, others are various error types
                # Simplify the 11-tag system to basic categories for user display
                if pred in [1, 2, 3, 4]:  # Various replacement/substitution errors
                    error_type = "Grammar"
                elif pred in [5, 6]:  # Missing elements
                    error_type = "Missing"
                elif pred in [7, 8]:  # Unnecessary elements
                    error_type = "Unnecessary" 
                elif pred in [9, 10]:  # Other error types
                    error_type = "Usage"
                else:
                    error_type = "Error"
                
                error_spans.append({
                    "token": token,
                    "type": error_type,
                    "position": len(clean_tokens) - 1
                })
        
        return error_spans
    
    def _get_error_spans_detailed(self, text):
        """Extract error spans with detailed second_level_tag categories"""
        ged_tags_str, tokens, predictions = self._get_ged_predictions(text)
        
        error_spans = []
        error_types = []
        clean_tokens = []
        
        # Correct id2label mapping
        id2label = {
            0: "correct",
            1: "ORTH",
            2: "FORM", 
            3: "MORPH",
            4: "DET",
            5: "POS",
            6: "VERB",
            7: "NUM",
            8: "WORD",
            9: "PUNCT",
            10: "RED",
            11: "MULTIWORD",
            12: "SPELL"
        }
        
        for token, pred in zip(tokens, predictions):
            if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
                continue
            clean_tokens.append(token)
            
            if pred != 0:  # 0 is correct, others are various error types
                error_type = id2label.get(pred, "OTHER")
                error_types.append(error_type)
                
                error_spans.append({
                    "token": token,
                    "type": error_type,
                    "position": len(clean_tokens) - 1
                })
        
        return error_spans, list(set(error_types))
    
    def _preprocess_inputs(self, text, max_length=128):
        """Preprocess input text exactly as during training"""
        # Get GED predictions
        ged_tags, _, _ = self._get_ged_predictions(text)
        
        # Tokenize source text (same as training)
        src_tokens = self.t5_tokenizer(
            text, 
            truncation=True, 
            max_length=max_length, 
            return_tensors="pt"
        )
        
        # Tokenize GED tags (same as training)
        ged_tokens = self.t5_tokenizer(
            ged_tags, 
            truncation=True, 
            max_length=max_length, 
            return_tensors="pt"
        )
        
        return {
            "input_ids": src_tokens.input_ids.to(self.device),
            "attention_mask": src_tokens.attention_mask.to(self.device),
            "ged_input_ids": ged_tokens.input_ids.to(self.device),
            "ged_attention_mask": ged_tokens.attention_mask.to(self.device)
        }
    
    def _forward_with_ged(self, input_ids, attention_mask, ged_input_ids, ged_attention_mask, max_length=200):
        """
        Forward pass with GED integration - replicates T5WithGED.forward() logic
        """
        # Get source encoder outputs
        src_encoder_outputs = self.t5_model.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
            return_dict=True
        )
        
        # Get GED encoder outputs
        ged_encoder_outputs = self.ged_encoder(
            input_ids=ged_input_ids,
            attention_mask=ged_attention_mask,
            return_dict=True
        )
        
        # Get hidden states
        src_hidden_states = src_encoder_outputs.last_hidden_state
        ged_hidden_states = ged_encoder_outputs.last_hidden_state
        
        # Combine hidden states (same as training)
        min_len = min(src_hidden_states.size(1), ged_hidden_states.size(1))
        combined = torch.cat([
            src_hidden_states[:, :min_len, :],
            ged_hidden_states[:, :min_len, :]
        ], dim=2)
        
        # Apply gating mechanism
        gate_scores = torch.sigmoid(self.gate(combined))
        combined_hidden = (
            gate_scores * src_hidden_states[:, :min_len, :] +
            (1 - gate_scores) * ged_hidden_states[:, :min_len, :]
        )
        
        # Update encoder outputs
        src_encoder_outputs.last_hidden_state = combined_hidden
        
        # Generate using T5 decoder
        decoder_outputs = self.t5_model.generate(
            encoder_outputs=src_encoder_outputs,
            max_length=max_length,
            do_sample=False,
            num_beams=1
        )
        
        return decoder_outputs
    
    def correct_text(self, text, max_length=200):
        """
        Correct grammatical errors in input text
        
        Args:
            text: Input text to correct
            max_length: Maximum length for generation
            
        Returns:
            Corrected text as string
        """
        # Preprocess inputs exactly as training
        inputs = self._preprocess_inputs(text)
        
        # Generate correction using GED-enhanced model
        with torch.no_grad():
            generated_ids = self._forward_with_ged(
                input_ids=inputs["input_ids"],
                attention_mask=inputs["attention_mask"],
                ged_input_ids=inputs["ged_input_ids"],
                ged_attention_mask=inputs["ged_attention_mask"],
                max_length=max_length
            )
        
        # Decode output
        corrected_text = self.t5_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        return corrected_text
    
    def analyze_text(self, text):
        """Enhanced analysis method for Gradio integration"""
        if not text.strip():
            return "Model not available or empty text", ""

        try:
            # Get corrected text
            corrected_text = self.correct_text(text)

            # Get error spans (use the original method for display)
            error_spans = self._get_error_spans(text)
        
            # Generate HTML output
            html_output = self.generate_html_analysis(text, corrected_text, error_spans)
        
            return corrected_text, html_output
        
        except Exception as e:
            return f"Error during analysis: {str(e)}", ""
    
    def generate_html_analysis(self, original, corrected, error_spans):
        """Generate enhanced HTML analysis output"""
        # Create highlighted original text
        highlighted_original = original
        if error_spans:
            # Sort by position in reverse to avoid index shifting
            sorted_spans = sorted(error_spans, key=lambda x: x['position'], reverse=True)
            
            # Simple highlighting - in a more sophisticated version, you'd map token positions to character positions
            for span in sorted_spans:
                token = span['token']
                error_type = span['type']
                
                # Color coding for different error types
                color_map = {
                    "Grammar": "#ffebee",      # Light red
                    "Missing": "#e8f5e8",      # Light green
                    "Unnecessary": "#fff3e0",   # Light orange
                    "Usage": "#e3f2fd"         # Light blue
                }
                
                color = color_map.get(error_type, "#f5f5f5")
                
                # Simple token replacement (basic highlighting)
                if token in highlighted_original:
                    highlighted_original = highlighted_original.replace(
                        token, 
                        f"<span style='background-color: {color}; padding: 1px 3px; border-radius: 3px; margin: 0 1px;' title='{error_type}'>{token}</span>",
                        1
                    )
        
        html = f"""
        <div style='font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #f9f9f9;'>
            <h3 style='color: #333; margin-top: 0;'>Grammar Analysis Results</h3>
            
            <div style='margin: 15px 0;'>
                <h4 style='color: #555;'>Original Text with Error Highlighting:</h4>
                <div style='padding: 10px; background-color: #fff; border: 1px solid #ddd; border-radius: 4px;'>{highlighted_original}</div>
            </div>
            
            <div style='margin: 15px 0;'>
                <h4 style='color: #28a745;'>Corrected Text:</h4>
                <p style='padding: 10px; background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px;'>{corrected}</p>
            </div>
            
            <div style='margin: 15px 0;'>
                <h4 style='color: #333;'>Error Summary:</h4>
                <p style='color: #666;'>Found {len(error_spans)} potential issues</p>
                
                <div style='margin-top: 10px;'>
                    <span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #ffebee; border-radius: 12px; font-size: 12px;'>Grammar</span>
                    <span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #e8f5e8; border-radius: 12px; font-size: 12px;'>Missing</span>
                    <span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #fff3e0; border-radius: 12px; font-size: 12px;'>Unnecessary</span>
                    <span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #e3f2fd; border-radius: 12px; font-size: 12px;'>Usage</span>
                </div>
            </div>
        </div>
        """
        return html
    
def clear_and_reload_database():
    """Clear and reload the sentence database"""
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Clear existing data
    c.execute("DELETE FROM sentence_database")
    conn.commit()
    print("Cleared existing sentence database")
    
    conn.close()
    
    # Reload
    load_sentence_database()

# Initialize SQLite database for storing submissions and exercises
def init_database():
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Users table
    c.execute('''CREATE TABLE IF NOT EXISTS users (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        username TEXT UNIQUE NOT NULL,
        email TEXT UNIQUE NOT NULL,
        role TEXT NOT NULL,
        password_hash TEXT NOT NULL,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )''')
    
    # Tasks table
    c.execute('''CREATE TABLE IF NOT EXISTS tasks (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        title TEXT NOT NULL,
        description TEXT NOT NULL,
        image_url TEXT,
        creator_id INTEGER,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )''')
    
    # Submissions table
    c.execute('''CREATE TABLE IF NOT EXISTS submissions (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        task_id INTEGER,
        student_name TEXT NOT NULL,
        content TEXT NOT NULL,
        analysis_result TEXT,
        analysis_html TEXT,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )''')
    
    # Exercises table
    c.execute('''CREATE TABLE IF NOT EXISTS exercises (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        title TEXT NOT NULL,
        instructions TEXT NOT NULL,
        sentences TEXT NOT NULL,
        image_url TEXT,
        submission_id INTEGER,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )''')
    
    # Exercise attempts table
    c.execute('''CREATE TABLE IF NOT EXISTS exercise_attempts (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        exercise_id INTEGER,
        student_name TEXT NOT NULL,
        responses TEXT NOT NULL,
        score REAL,
        created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
    )''')
    
    # Sentence database table - ADD THIS
    c.execute('''CREATE TABLE IF NOT EXISTS sentence_database (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        text TEXT NOT NULL,
        tags TEXT NOT NULL,
        error_types TEXT NOT NULL
    )''')
    
    conn.commit()
    conn.close()


def load_sentence_database(jsonl_file_path='sentencewise_full.jsonl'):
    """Load sentence database from JSONL file"""
    print(f"Debug: Attempting to load from: {jsonl_file_path}")
    print(f"Debug: Current working directory: {os.getcwd()}")
    print(f"Debug: File exists: {os.path.exists(jsonl_file_path)}")
    
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Create sentence database table
    c.execute('''CREATE TABLE IF NOT EXISTS sentence_database (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        text TEXT NOT NULL,
        tags TEXT NOT NULL,
        error_types TEXT NOT NULL
    )''')
    
    # Check if data already loaded
    c.execute("SELECT COUNT(*) FROM sentence_database")
    current_count = c.fetchone()[0]
    if current_count > 0:
        print(f"Sentence database already loaded with {current_count} sentences")
        conn.close()
        return
    
    # Load JSONL file
    try:
        print(f"Debug: Opening file {jsonl_file_path}")
        with open(jsonl_file_path, 'r', encoding='utf-8') as f:
            lines_processed = 0
            for line_num, line in enumerate(f, 1):
                try:
                    line = line.strip()
                    if not line:  # Skip empty lines
                        continue
                        
                    data = json.loads(line)
                    text = data.get('text', '')
                    tags = data.get('tags', [])
                    
                    if not text or not tags:
                        print(f"Debug: Skipping line {line_num} - missing text or tags")
                        continue
                    
                    # Extract second_level_tag error types
                    error_types = []
                    for tag in tags:
                        second_level = tag.get('second_level_tag', '')
                        if second_level:
                            error_types.append(second_level)
                    
                    error_types = list(set(error_types))  # Remove duplicates
                    
                    # Debug: Print first few entries
                    if line_num <= 3:
                        print(f"Debug line {line_num}: text='{text[:50]}...', error_types={error_types}")
                        print(f"Debug: Raw tags for line {line_num}: {tags}")
                    
                    if error_types:  # Only insert if we have error types
                        c.execute("""INSERT INTO sentence_database (text, tags, error_types) 
                                    VALUES (?, ?, ?)""",
                                  (text, json.dumps(tags), json.dumps(error_types)))
                        lines_processed += 1
                    
                    if line_num % 1000 == 0:
                        print(f"Processed {line_num} lines, inserted {lines_processed} sentences...")
                        
                except json.JSONDecodeError as e:
                    print(f"JSON decode error on line {line_num}: {e}")
                    print(f"Line content: {line[:100]}...")
                    continue
                except Exception as e:
                    print(f"Error processing line {line_num}: {e}")
                    continue
        
        conn.commit()
        print(f"Successfully loaded sentence database with {lines_processed} sentences from {line_num} total lines")
        
    except FileNotFoundError:
        print(f"Error: {jsonl_file_path} not found in {os.getcwd()}")
        print("Available files:")
        try:
            files = os.listdir('.')
            for f in files:
                if f.endswith('.jsonl') or f.endswith('.json'):
                    print(f"  - {f}")
        except:
            print("  Could not list files")
    except Exception as e:
        print(f"Error loading sentence database: {e}")
    
    conn.close()

def find_similar_sentences(error_types, limit=5):
    """Find sentences with similar error types from database"""
    if not error_types:
        return []
    
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Build query to find sentences with matching error types
    similar_sentences = []
    
    for error_type in error_types:
        c.execute("""SELECT text, tags FROM sentence_database 
                     WHERE error_types LIKE ? 
                     ORDER BY RANDOM() 
                     LIMIT ?""", (f'%"{error_type}"%', limit))
        
        results = c.fetchall()
        for text, tags_json in results:
            similar_sentences.append({
                'text': text,
                'tags': json.loads(tags_json)
            })
    
    conn.close()
    
    # Remove duplicates and limit to requested number
    seen_texts = set()
    unique_sentences = []
    for sentence in similar_sentences:
        if sentence['text'] not in seen_texts:
            seen_texts.add(sentence['text'])
            unique_sentences.append(sentence)
            if len(unique_sentences) >= limit:
                break
    
    return unique_sentences


# Initialize database and components
init_database()
print("Clearing and loading sentence database...")
clear_and_reload_database()
print("Initializing enhanced grammar checker...")
grammar_checker = HuggingFaceT5GEDInference()
print("Grammar checker initialized successfully!")

# Gradio Interface Functions
def analyze_student_writing(text, student_name, task_title="General Writing Task"):
    """Analyze student writing and store in database"""
    if not text.strip():
        return "Please enter some text to analyze.", ""
    
    if not student_name.strip():
        return "Please enter your name.", ""
    
    # Analyze text with enhanced model
    corrected_text, html_analysis = grammar_checker.analyze_text(text)
    
    # Store in database
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Insert task if not exists
    c.execute("INSERT OR IGNORE INTO tasks (title, description) VALUES (?, ?)", 
              (task_title, f"Writing task: {task_title}"))
    
    c.execute("SELECT id FROM tasks WHERE title = ?", (task_title,))
    task_id = c.fetchone()[0]
    
    # Insert submission
    analysis_data = {
        "corrected_text": corrected_text,
        "original_text": text,
        "html_output": html_analysis
    }
    
    c.execute("""INSERT INTO submissions (task_id, student_name, content, analysis_result, analysis_html) 
                 VALUES (?, ?, ?, ?, ?)""",
              (task_id, student_name, text, json.dumps(analysis_data), html_analysis))
    
    submission_id = c.lastrowid
    conn.commit()
    conn.close()
    
    return corrected_text, html_analysis


def create_exercise_from_text(text, exercise_title="Grammar Exercise"):
    """Create an exercise from text with errors using sentence database"""
    if not text.strip():
        return "Please enter text to create an exercise.", ""
    
    # Analyze text to extract error types
    sentences = nltk.sent_tokenize(text)
    exercise_sentences = []
    all_error_types = []
    
    for sentence in sentences:
        # Get detailed error analysis
        error_spans, error_types = grammar_checker._get_error_spans_detailed(sentence)
        
        if error_types:  # Has errors
            corrected, _ = grammar_checker.analyze_text(sentence)
            exercise_sentences.append({
                "original": sentence.strip(),
                "corrected": corrected.strip(),
                "error_types": error_types
            })
            all_error_types.extend(error_types)
    
    if not exercise_sentences:
        return "No errors found in the text. Cannot create exercise.", ""
    
    # Find similar sentences from database
    unique_error_types = list(set(all_error_types))
    similar_sentences = find_similar_sentences(unique_error_types, limit=5)
    
    # Combine original sentences with similar ones from database
    all_exercise_sentences = exercise_sentences.copy()
    
    for similar in similar_sentences:
        # Get corrected version of similar sentence
        corrected, _ = grammar_checker.analyze_text(similar['text'])
        all_exercise_sentences.append({
            "original": similar['text'],
            "corrected": corrected,
            "error_types": [tag.get('second_level_tag', '') for tag in similar['tags']]
        })
    
    # Store exercise in database
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    c.execute("""INSERT INTO exercises (title, instructions, sentences) 
                 VALUES (?, ?, ?)""",
              (exercise_title, 
               "Correct the grammatical errors in the following sentences:",
               json.dumps(all_exercise_sentences)))
    
    exercise_id = c.lastrowid
    conn.commit()
    conn.close()
    
    # Generate exercise HTML
    exercise_html = f"""
    <div style='font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;'>
        <h3>{exercise_title}</h3>
        <p><strong>Exercise ID: {exercise_id}</strong></p>
        <p><strong>Instructions:</strong> Correct the grammatical errors in the following sentences:</p>
        <p><em>Error types found: {', '.join(unique_error_types)}</em></p>
        <ol>
    """
    
    for i, sentence_data in enumerate(all_exercise_sentences, 1):
        error_info = f" (Error types: {', '.join(sentence_data.get('error_types', []))})" if sentence_data.get('error_types') else ""
        exercise_html += f"<li style='margin: 10px 0; padding: 10px; background-color: #f8f9fa; border-radius: 4px;'>{sentence_data['original']}{error_info}</li>"
    
    exercise_html += "</ol></div>"
    
    return f"Exercise created with {len(all_exercise_sentences)} sentences ({len(exercise_sentences)} original + {len(similar_sentences)} from database)! Exercise ID: {exercise_id}", exercise_html


def attempt_exercise(exercise_id, student_responses, student_name):
    """Submit exercise attempt and get score using enhanced analysis"""
    if not student_name.strip():
        return "Please enter your name.", ""
    
    try:
        exercise_id = int(exercise_id)
    except:
        return "Please enter a valid exercise ID.", ""
    
    # Get exercise from database
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    c.execute("SELECT sentences FROM exercises WHERE id = ?", (exercise_id,))
    result = c.fetchone()
    
    if not result:
        return "Exercise not found.", ""
    
    exercise_sentences = json.loads(result[0])
    
    # Parse student responses
    responses = [r.strip() for r in student_responses.split('\n') if r.strip()]
    
    if len(responses) != len(exercise_sentences):
        return f"Please provide exactly {len(exercise_sentences)} responses (one per line).", ""
    
    # Calculate score using enhanced analysis
    correct_count = 0
    detailed_results = []
    
    for i, (sentence_data, response) in enumerate(zip(exercise_sentences, responses), 1):
        original = sentence_data['original']
        expected = sentence_data['corrected']
        
        # Use the model to check if the response is correct
        response_corrected, response_analysis = grammar_checker.analyze_text(response)
        is_correct = response_corrected.strip() == response.strip()  # No further corrections needed
        
        if is_correct:
            correct_count += 1
            
        detailed_results.append({
            'sentence_num': i,
            'original': original,
            'student_response': response,
            'expected': expected,
            'model_correction': response_corrected,
            'is_correct': is_correct,
            'analysis_html': response_analysis
        })
    
    score = (correct_count / len(exercise_sentences)) * 100
    
    # Store attempt
    attempt_data = {
        "responses": responses,
        "score": score,
        "detailed_results": detailed_results
    }
    
    c.execute("""INSERT INTO exercise_attempts (exercise_id, student_name, responses, score) 
                 VALUES (?, ?, ?, ?)""",
              (exercise_id, student_name, json.dumps(attempt_data), score))
    
    conn.commit()
    conn.close()
    
    # Create beautiful HTML results
    score_color = "#28a745" if score >= 70 else "#ffc107" if score >= 50 else "#dc3545"
    
    feedback_html = f"""
    <div style='font-family: Arial, sans-serif; max-width: 1000px; margin: 0 auto;'>
        <!-- Header Section -->
        <div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 10px 10px 0 0; text-align: center;'>
            <h2 style='margin: 0; font-size: 28px;'>📊 Exercise Results</h2>
            <div style='margin-top: 15px; font-size: 48px; font-weight: bold; color: {score_color};'>{score:.1f}%</div>
            <p style='margin: 10px 0 0 0; font-size: 18px; opacity: 0.9;'>{correct_count} out of {len(exercise_sentences)} sentences correct</p>
        </div>
        
        <!-- Performance Badge -->
        <div style='background-color: #f8f9fa; padding: 20px; text-align: center; border-left: 1px solid #ddd; border-right: 1px solid #ddd;'>
    """
    
    if score >= 90:
        feedback_html += """<span style='background-color: #28a745; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>🏆 Excellent Work!</span>"""
    elif score >= 70:
        feedback_html += """<span style='background-color: #17a2b8; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>👍 Good Job!</span>"""
    elif score >= 50:
        feedback_html += """<span style='background-color: #ffc107; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>📚 Keep Practicing!</span>"""
    else:
        feedback_html += """<span style='background-color: #dc3545; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>💪 Try Again!</span>"""
    
    feedback_html += """
        </div>
        
        <!-- Detailed Results -->
        <div style='background-color: white; border: 1px solid #ddd; border-radius: 0 0 10px 10px;'>
    """
    
    for result in detailed_results:
        # Determine colors and icons
        if result['is_correct']:
            border_color = "#28a745"
            icon = "✅"
            status_bg = "#d4edda"
            status_text = "Correct!"
        else:
            border_color = "#dc3545"
            icon = "❌"
            status_bg = "#f8d7da"
            status_text = "Needs Improvement"
        
        feedback_html += f"""
        <div style='border-left: 4px solid {border_color}; margin: 20px; padding: 20px; background-color: #fafafa; border-radius: 8px;'>
            <div style='display: flex; align-items: center; margin-bottom: 15px;'>
                <span style='font-size: 24px; margin-right: 10px;'>{icon}</span>
                <h4 style='margin: 0; color: #333;'>Sentence {result['sentence_num']}</h4>
                <span style='margin-left: auto; background-color: {status_bg}; padding: 4px 12px; border-radius: 12px; font-size: 12px; font-weight: bold;'>{status_text}</span>
            </div>
            
            <div style='margin-bottom: 15px;'>
                <div style='margin-bottom: 10px;'>
                    <strong style='color: #6c757d;'>📝 Original:</strong>
                    <div style='background-color: #e9ecef; padding: 10px; border-radius: 6px; margin-top: 5px; font-style: italic;'>{result['original']}</div>
                </div>
                
                <div style='margin-bottom: 10px;'>
                    <strong style='color: #007bff;'>✏️ Your Answer:</strong>
                    <div style='background-color: #e7f3ff; padding: 10px; border-radius: 6px; margin-top: 5px;'>{result['student_response']}</div>
                </div>
        """
        
        # Only show model analysis if there were errors in student's response
        if not result['is_correct'] and result['analysis_html']:
            feedback_html += f"""
                <div style='margin-top: 15px; padding: 15px; background-color: #fff3cd; border-radius: 6px; border-left: 3px solid #ffc107;'>
                    <strong style='color: #856404;'>🔍 Grammar Analysis of Your Response:</strong>
                    <div style='margin-top: 10px; font-size: 14px;'>
                        {result['analysis_html']}
                    </div>
                </div>
            """
        
        feedback_html += """
            </div>
        </div>
        """
    
    feedback_html += """
        </div>
        
        <!-- Footer -->
        <div style='text-align: center; margin-top: 30px; color: #6c757d; font-size: 14px;'>
            <p>💡 <strong>Tip:</strong> Review the grammar analysis above to understand common error patterns and improve your writing!</p>
        </div>
    </div>
    """
    
    return f"Score: {score:.1f}%", feedback_html


def preview_exercise(exercise_id):
    """Preview an exercise before attempting it"""
    if not exercise_id.strip():
        return "Please enter an exercise ID.", ""
    
    try:
        exercise_id = int(exercise_id)
    except:
        return "Please enter a valid exercise ID.", ""
    
    # Get exercise from database
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    c.execute("SELECT title, instructions, sentences FROM exercises WHERE id = ?", (exercise_id,))
    result = c.fetchone()
    
    if not result:
        return "Exercise not found.", ""
    
    title, instructions, sentences_json = result
    exercise_sentences = json.loads(sentences_json)
    
    conn.close()
    
    # Create preview HTML
    preview_html = f"""
    <div style='font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto;'>
        <!-- Header -->
        <div style='background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); color: white; padding: 25px; border-radius: 10px 10px 0 0; text-align: center;'>
            <h2 style='margin: 0; font-size: 24px;'>📋 {title}</h2>
            <p style='margin: 10px 0 0 0; font-size: 16px; opacity: 0.9;'>Exercise ID: {exercise_id}</p>
        </div>
        
        <!-- Instructions -->
        <div style='background-color: #e8f5e9; padding: 20px; border-left: 1px solid #ddd; border-right: 1px solid #ddd;'>
            <h3 style='margin: 0 0 10px 0; color: #2e7d32;'>📝 Instructions:</h3>
            <p style='margin: 0; font-size: 16px; line-height: 1.5;'>{instructions}</p>
            <p style='margin: 10px 0 0 0; font-size: 14px; color: #666; font-style: italic;'>
                💡 Tip: Read each sentence carefully and identify grammatical errors before writing your corrections.
            </p>
        </div>
        
        <!-- Sentences -->
        <div style='background-color: white; border: 1px solid #ddd; border-radius: 0 0 10px 10px; padding: 20px;'>
            <h3 style='margin: 0 0 20px 0; color: #333;'>📚 Sentences to Correct ({len(exercise_sentences)} total):</h3>
            <ol style='padding-left: 20px;'>
    """
    
    for i, sentence_data in enumerate(exercise_sentences, 1):
        original = sentence_data['original']
        error_types = sentence_data.get('error_types', [])
        
        # Add error type hints if available
        error_hint = ""
        if error_types:
            error_hint = f"<br><small style='color: #666; font-style: italic;'>💡 Focus on: {', '.join(error_types)}</small>"
        
        preview_html += f"""
            <li style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 6px; border-left: 3px solid #4CAF50;'>
                <div style='font-size: 16px; line-height: 1.5; margin-bottom: 5px;'>{original}</div>
                {error_hint}
            </li>
        """
    
    preview_html += f"""
            </ol>
            
            <div style='margin-top: 30px; padding: 20px; background-color: #f0f8ff; border-radius: 8px; border: 1px solid #b3d9ff;'>
                <h4 style='margin: 0 0 10px 0; color: #0066cc;'>🎯 How to Complete This Exercise:</h4>
                <ol style='margin: 0; padding-left: 20px; color: #333;'>
                    <li>Read each sentence carefully</li>
                    <li>Identify grammatical errors (spelling, grammar, word choice, etc.)</li>
                    <li>Write your corrected version of each sentence</li>
                    <li>Enter all your answers in the text box below (one sentence per line)</li>
                    <li>Submit to get immediate feedback and scoring</li>
                </ol>
            </div>
        </div>
    </div>
    """
    
    return f"Exercise '{title}' loaded successfully! {len(exercise_sentences)} sentences to correct.", preview_html


def get_student_progress(student_name):
    """Get student's submission and exercise history"""
    if not student_name.strip():
        return "Please enter a student name."
    
    conn = sqlite3.connect('language_app.db')
    c = conn.cursor()
    
    # Get submissions
    c.execute("""SELECT s.id, s.content, s.created_at, t.title 
                 FROM submissions s JOIN tasks t ON s.task_id = t.id 
                 WHERE s.student_name = ? ORDER BY s.created_at DESC""", (student_name,))
    submissions = c.fetchall()
    
    # Get exercise attempts
    c.execute("""SELECT ea.score, ea.created_at, e.title 
                 FROM exercise_attempts ea JOIN exercises e ON ea.exercise_id = e.id 
                 WHERE ea.student_name = ? ORDER BY ea.created_at DESC""", (student_name,))
    attempts = c.fetchall()
    
    conn.close()
    
    progress_html = f"""
    <div style='font-family: Arial, sans-serif; padding: 20px;'>
        <h3>Progress for {student_name}</h3>
        
        <h4>Writing Submissions ({len(submissions)})</h4>
        <ul>
    """
    
    for sub in submissions:
        progress_html += f"<li><strong>{sub[3]}</strong> - {sub[2][:16]} - {len(sub[1])} characters</li>"
    
    progress_html += f"""
        </ul>
        
        <h4>Exercise Attempts ({len(attempts)})</h4>
        <ul>
    """
    
    for att in attempts:
        progress_html += f"<li><strong>{att[2]}</strong> - Score: {att[0]:.1f}% - {att[1][:16]}</li>"
    
    progress_html += "</ul></div>"
    
    return progress_html

# Create Gradio Interface
with gr.Blocks(title="Language Learning App - Enhanced Grammar Checker", theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🎓 Language Learning Application")
    gr.Markdown("### AI-Powered Grammar Checking and Exercise Generation")
    gr.Markdown("*Now featuring advanced T5-GED neural network with enhanced error detection*")
    
    with gr.Tabs():
        # Student Writing Analysis Tab
        with gr.TabItem("📝 Writing Analysis"):
            gr.Markdown("## Submit Your Writing for Analysis")
            
            with gr.Row():
                with gr.Column():
                    student_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
                    task_title_input = gr.Textbox(label="Assignment Title", value="General Writing Task")
                    writing_input = gr.Textbox(
                        label="Your Writing", 
                        lines=8, 
                        placeholder="Paste your writing here for grammar analysis..."
                    )
                    analyze_btn = gr.Button("Analyze Writing", variant="primary")
                
                with gr.Column():
                    corrected_output = gr.Textbox(label="Corrected Text", lines=6)
                    analysis_output = gr.HTML(label="Detailed Analysis")
            
            analyze_btn.click(
                analyze_student_writing,
                inputs=[writing_input, student_name_input, task_title_input],
                outputs=[corrected_output, analysis_output]
            )
        
        # Exercise Creation Tab
        with gr.TabItem("🏋️ Exercise Creation"):
            gr.Markdown("## Create Grammar Exercises")
            
            with gr.Row():
                with gr.Column():
                    exercise_title_input = gr.Textbox(label="Exercise Title", value="Grammar Exercise")
                    exercise_text_input = gr.Textbox(
                        label="Text with Errors", 
                        lines=6,
                        placeholder="Enter text containing grammatical errors to create an exercise..."
                    )
                    create_exercise_btn = gr.Button("Create Exercise", variant="primary")
                
                with gr.Column():
                    exercise_result = gr.Textbox(label="Result")
                    exercise_display = gr.HTML(label="Generated Exercise")
            
            create_exercise_btn.click(
                create_exercise_from_text,
                inputs=[exercise_text_input, exercise_title_input],
                outputs=[exercise_result, exercise_display]
            )
        
        # Exercise Attempt Tab
        with gr.TabItem("✏️ Exercise Practice"):
            gr.Markdown("## Practice Grammar Exercises")
            with gr.Row():
                with gr.Column():
                    exercise_id_input = gr.Textbox(label="Exercise ID", placeholder="Enter exercise ID")
                    
                    # Preview section
                    with gr.Row():
                        preview_btn = gr.Button("👀 Preview Exercise", variant="secondary")
                        
                    preview_result = gr.Textbox(label="Preview Status", lines=1)
                    preview_display = gr.HTML(label="Exercise Preview")
                    
                    # Separator
                    gr.Markdown("---")
                    
                    # Attempt section
                    gr.Markdown("### 📝 Complete the Exercise")
                    student_name_exercise = gr.Textbox(label="Your Name", placeholder="Enter your name")
                    responses_input = gr.Textbox(
                        label="Your Answers", 
                        lines=8,
                        placeholder="After previewing the exercise above, enter your corrected sentences here (one per line)..."
                    )
                    submit_exercise_btn = gr.Button("✅ Submit Answers", variant="primary")
                
                with gr.Column():
                    score_output = gr.Textbox(label="Your Score")
                    feedback_output = gr.HTML(label="Detailed Feedback")
            
            # Connect the buttons
            preview_btn.click(
                preview_exercise,
                inputs=[exercise_id_input],
                outputs=[preview_result, preview_display]
            )
            
            submit_exercise_btn.click(
                attempt_exercise,
                inputs=[exercise_id_input, responses_input, student_name_exercise],
                outputs=[score_output, feedback_output]
            )
        # Progress Tracking Tab
        with gr.TabItem("📊 Student Progress"):
            gr.Markdown("## View Student Progress")
            
            with gr.Row():
                with gr.Column(scale=1):
                    progress_student_name = gr.Textbox(label="Student Name", placeholder="Enter student name")
                    get_progress_btn = gr.Button("Get Progress", variant="primary")
                
                with gr.Column(scale=2):
                    progress_output = gr.HTML(label="Student Progress")
            
            get_progress_btn.click(
                get_student_progress,
                inputs=[progress_student_name],
                outputs=[progress_output]
            )
    
    gr.Markdown("""
    ---
    ### How to Use:
    1. **Writing Analysis**: Submit your writing to get grammar corrections and detailed error analysis
    2. **Exercise Creation**: Teachers can create exercises from text containing errors
    3. **Exercise Practice**: Students can practice with generated exercises and get scored feedback
    4. **Progress Tracking**: View student progress across submissions and exercises
    
    *Powered by advanced T5-GED neural networks for enhanced grammar error detection and correction*
    """)

if __name__ == "__main__":
    app.launch(share=True)