Spaces:

Zlovoblachko
/

lang_learn_app

Running

App Files Files Community

lang_learn_app / app.py

Zlovoblachko

databank added

62e8dda 29 days ago

raw

history blame

45.6 kB

	import gradio as gr
	import sqlite3
	import json
	import os
	from datetime import datetime
	import torch
	import nltk
	from transformers import (
	T5Tokenizer,
	T5ForConditionalGeneration,
	ElectraTokenizer,
	ElectraForTokenClassification
	)
	import torch.nn as nn
	from tqdm import tqdm

	# Download NLTK data
	try:
	nltk.data.find('tokenizers/punkt')
	except LookupError:
	nltk.download('punkt')

	class HuggingFaceT5GEDInference:
	def __init__(self, model_name="Zlovoblachko/REAlEC_2step_model_testing",
	ged_model_name="Zlovoblachko/11tag-electra-grammar-stage2", device=None):
	"""
	Initialize the inference class for T5-GED model from HuggingFace

	Args:
	model_name: HuggingFace model name/path for the T5-GED model
	ged_model_name: HuggingFace model name/path for the GED model
	device: Device to run inference on (cuda/cpu)
	"""
	self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load GED model and tokenizer (same as training)
	print(f"Loading GED model from HuggingFace: {ged_model_name}...")
	self.ged_model, self.ged_tokenizer = self._load_ged_model(ged_model_name)

	# Load T5 model and tokenizer from HuggingFace
	print(f"Loading T5 model from HuggingFace: {model_name}...")
	self.t5_tokenizer = T5Tokenizer.from_pretrained(model_name)
	self.t5_model = T5ForConditionalGeneration.from_pretrained(model_name)
	self.t5_model.to(self.device)

	# Create GED encoder (copy of T5 encoder)
	self.ged_encoder = T5ForConditionalGeneration.from_pretrained(model_name).encoder
	self.ged_encoder.to(self.device)

	# Create gating mechanism
	encoder_hidden_size = self.t5_model.config.d_model
	self.gate = nn.Linear(2 * encoder_hidden_size, 1)
	self.gate.to(self.device)

	# Try to load GED components from HuggingFace
	try:
	print("Loading GED components...")
	from huggingface_hub import hf_hub_download
	ged_components_path = hf_hub_download(
	repo_id=model_name,
	filename="ged_components.pt",
	cache_dir=None
	)
	ged_components = torch.load(ged_components_path, map_location=self.device)
	self.ged_encoder.load_state_dict(ged_components["ged_encoder"])
	self.gate.load_state_dict(ged_components["gate"])
	print("GED components loaded successfully!")
	except Exception as e:
	print(f"Warning: Could not load GED components: {e}")
	print("Using default initialization for GED encoder and gate.")

	# Set to evaluation mode
	self.t5_model.eval()
	self.ged_encoder.eval()
	self.gate.eval()

	def _load_ged_model(self, model_name):
	"""Load GED model and tokenizer from HuggingFace"""
	tokenizer = ElectraTokenizer.from_pretrained(model_name)
	model = ElectraForTokenClassification.from_pretrained(model_name)
	model.to(self.device)
	model.eval()
	return model, tokenizer

	def _get_ged_predictions(self, text):
	"""Get GED predictions for input text - exact same as training preprocessing"""
	inputs = self.ged_tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(self.device)
	with torch.no_grad():
	outputs = self.ged_model(**inputs)
	logits = outputs.logits
	predictions = torch.argmax(logits, dim=2)
	token_predictions = predictions[0].cpu().numpy().tolist()
	tokens = self.ged_tokenizer.convert_ids_to_tokens(inputs.input_ids[0])

	ged_tags = []
	for token, pred in zip(tokens, token_predictions):
	if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
	continue
	ged_tags.append(str(pred))

	return " ".join(ged_tags), tokens, token_predictions

	def _get_error_spans(self, text):
	"""Extract error spans with simplified categories for display"""
	ged_tags_str, tokens, predictions = self._get_ged_predictions(text)

	error_spans = []
	clean_tokens = []

	for token, pred in zip(tokens, predictions):
	if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
	continue
	clean_tokens.append(token)

	if pred != 0: # 0 is correct, others are various error types
	# Simplify the 11-tag system to basic categories for user display
	if pred in [1, 2, 3, 4]: # Various replacement/substitution errors
	error_type = "Grammar"
	elif pred in [5, 6]: # Missing elements
	error_type = "Missing"
	elif pred in [7, 8]: # Unnecessary elements
	error_type = "Unnecessary"
	elif pred in [9, 10]: # Other error types
	error_type = "Usage"
	else:
	error_type = "Error"

	error_spans.append({
	"token": token,
	"type": error_type,
	"position": len(clean_tokens) - 1
	})

	return error_spans

	def _get_error_spans_detailed(self, text):
	"""Extract error spans with detailed second_level_tag categories"""
	ged_tags_str, tokens, predictions = self._get_ged_predictions(text)

	error_spans = []
	error_types = []
	clean_tokens = []

	# Correct id2label mapping
	id2label = {
	0: "correct",
	1: "ORTH",
	2: "FORM",
	3: "MORPH",
	4: "DET",
	5: "POS",
	6: "VERB",
	7: "NUM",
	8: "WORD",
	9: "PUNCT",
	10: "RED",
	11: "MULTIWORD",
	12: "SPELL"
	}

	for token, pred in zip(tokens, predictions):
	if token.startswith("##") or token in ["[CLS]", "[SEP]", "[PAD]"]:
	continue
	clean_tokens.append(token)

	if pred != 0: # 0 is correct, others are various error types
	error_type = id2label.get(pred, "OTHER")
	error_types.append(error_type)

	error_spans.append({
	"token": token,
	"type": error_type,
	"position": len(clean_tokens) - 1
	})

	return error_spans, list(set(error_types))

	def _preprocess_inputs(self, text, max_length=128):
	"""Preprocess input text exactly as during training"""
	# Get GED predictions
	ged_tags, _, _ = self._get_ged_predictions(text)

	# Tokenize source text (same as training)
	src_tokens = self.t5_tokenizer(
	text,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	# Tokenize GED tags (same as training)
	ged_tokens = self.t5_tokenizer(
	ged_tags,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)

	return {
	"input_ids": src_tokens.input_ids.to(self.device),
	"attention_mask": src_tokens.attention_mask.to(self.device),
	"ged_input_ids": ged_tokens.input_ids.to(self.device),
	"ged_attention_mask": ged_tokens.attention_mask.to(self.device)
	}

	def _forward_with_ged(self, input_ids, attention_mask, ged_input_ids, ged_attention_mask, max_length=200):
	"""
	Forward pass with GED integration - replicates T5WithGED.forward() logic
	"""
	# Get source encoder outputs
	src_encoder_outputs = self.t5_model.encoder(
	input_ids=input_ids,
	attention_mask=attention_mask,
	return_dict=True
	)

	# Get GED encoder outputs
	ged_encoder_outputs = self.ged_encoder(
	input_ids=ged_input_ids,
	attention_mask=ged_attention_mask,
	return_dict=True
	)

	# Get hidden states
	src_hidden_states = src_encoder_outputs.last_hidden_state
	ged_hidden_states = ged_encoder_outputs.last_hidden_state

	# Combine hidden states (same as training)
	min_len = min(src_hidden_states.size(1), ged_hidden_states.size(1))
	combined = torch.cat([
	src_hidden_states[:, :min_len, :],
	ged_hidden_states[:, :min_len, :]
	], dim=2)

	# Apply gating mechanism
	gate_scores = torch.sigmoid(self.gate(combined))
	combined_hidden = (
	gate_scores * src_hidden_states[:, :min_len, :] +
	(1 - gate_scores) * ged_hidden_states[:, :min_len, :]
	)

	# Update encoder outputs
	src_encoder_outputs.last_hidden_state = combined_hidden

	# Generate using T5 decoder
	decoder_outputs = self.t5_model.generate(
	encoder_outputs=src_encoder_outputs,
	max_length=max_length,
	do_sample=False,
	num_beams=1
	)

	return decoder_outputs

	def correct_text(self, text, max_length=200):
	"""
	Correct grammatical errors in input text

	Args:
	text: Input text to correct
	max_length: Maximum length for generation

	Returns:
	Corrected text as string
	"""
	# Preprocess inputs exactly as training
	inputs = self._preprocess_inputs(text)

	# Generate correction using GED-enhanced model
	with torch.no_grad():
	generated_ids = self._forward_with_ged(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	ged_input_ids=inputs["ged_input_ids"],
	ged_attention_mask=inputs["ged_attention_mask"],
	max_length=max_length
	)

	# Decode output
	corrected_text = self.t5_tokenizer.decode(generated_ids[0], skip_special_tokens=True)
	return corrected_text

	def analyze_text(self, text):
	"""Enhanced analysis method for Gradio integration"""
	if not text.strip():
	return "Model not available or empty text", ""

	try:
	# Get corrected text
	corrected_text = self.correct_text(text)

	# Get error spans (use the original method for display)
	error_spans = self._get_error_spans(text)

	# Generate HTML output
	html_output = self.generate_html_analysis(text, corrected_text, error_spans)

	return corrected_text, html_output

	except Exception as e:
	return f"Error during analysis: {str(e)}", ""

	def generate_html_analysis(self, original, corrected, error_spans):
	"""Generate enhanced HTML analysis output"""
	# Create highlighted original text
	highlighted_original = original
	if error_spans:
	# Sort by position in reverse to avoid index shifting
	sorted_spans = sorted(error_spans, key=lambda x: x['position'], reverse=True)

	# Simple highlighting - in a more sophisticated version, you'd map token positions to character positions
	for span in sorted_spans:
	token = span['token']
	error_type = span['type']

	# Color coding for different error types
	color_map = {
	"Grammar": "#ffebee", # Light red
	"Missing": "#e8f5e8", # Light green
	"Unnecessary": "#fff3e0", # Light orange
	"Usage": "#e3f2fd" # Light blue
	}

	color = color_map.get(error_type, "#f5f5f5")

	# Simple token replacement (basic highlighting)
	if token in highlighted_original:
	highlighted_original = highlighted_original.replace(
	token,
	f"<span style='background-color: {color}; padding: 1px 3px; border-radius: 3px; margin: 0 1px;' title='{error_type}'>{token}</span>",
	1
	)

	html = f"""
	<div style='font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; border: 1px solid #ddd; border-radius: 8px; background-color: #f9f9f9;'>
	<h3 style='color: #333; margin-top: 0;'>Grammar Analysis Results</h3>

	<div style='margin: 15px 0;'>
	<h4 style='color: #555;'>Original Text with Error Highlighting:</h4>
	<div style='padding: 10px; background-color: #fff; border: 1px solid #ddd; border-radius: 4px;'>{highlighted_original}</div>
	</div>

	<div style='margin: 15px 0;'>
	<h4 style='color: #28a745;'>Corrected Text:</h4>
	<p style='padding: 10px; background-color: #d4edda; border: 1px solid #c3e6cb; border-radius: 4px;'>{corrected}</p>
	</div>

	<div style='margin: 15px 0;'>
	<h4 style='color: #333;'>Error Summary:</h4>
	<p style='color: #666;'>Found {len(error_spans)} potential issues</p>

	<div style='margin-top: 10px;'>
	<span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #ffebee; border-radius: 12px; font-size: 12px;'>Grammar</span>
	<span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #e8f5e8; border-radius: 12px; font-size: 12px;'>Missing</span>
	<span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #fff3e0; border-radius: 12px; font-size: 12px;'>Unnecessary</span>
	<span style='display: inline-block; margin: 2px 5px; padding: 2px 8px; background-color: #e3f2fd; border-radius: 12px; font-size: 12px;'>Usage</span>
	</div>
	</div>
	</div>
	"""
	return html

	def clear_and_reload_database():
	"""Clear and reload the sentence database"""
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Clear existing data
	c.execute("DELETE FROM sentence_database")
	conn.commit()
	print("Cleared existing sentence database")

	conn.close()

	# Reload
	load_sentence_database()

	# Initialize SQLite database for storing submissions and exercises
	def init_database():
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Users table
	c.execute('''CREATE TABLE IF NOT EXISTS users (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	username TEXT UNIQUE NOT NULL,
	email TEXT UNIQUE NOT NULL,
	role TEXT NOT NULL,
	password_hash TEXT NOT NULL,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)''')

	# Tasks table
	c.execute('''CREATE TABLE IF NOT EXISTS tasks (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	title TEXT NOT NULL,
	description TEXT NOT NULL,
	image_url TEXT,
	creator_id INTEGER,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)''')

	# Submissions table
	c.execute('''CREATE TABLE IF NOT EXISTS submissions (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	task_id INTEGER,
	student_name TEXT NOT NULL,
	content TEXT NOT NULL,
	analysis_result TEXT,
	analysis_html TEXT,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)''')

	# Exercises table
	c.execute('''CREATE TABLE IF NOT EXISTS exercises (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	title TEXT NOT NULL,
	instructions TEXT NOT NULL,
	sentences TEXT NOT NULL,
	image_url TEXT,
	submission_id INTEGER,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)''')

	# Exercise attempts table
	c.execute('''CREATE TABLE IF NOT EXISTS exercise_attempts (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	exercise_id INTEGER,
	student_name TEXT NOT NULL,
	responses TEXT NOT NULL,
	score REAL,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
	)''')

	# Sentence database table - ADD THIS
	c.execute('''CREATE TABLE IF NOT EXISTS sentence_database (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	text TEXT NOT NULL,
	tags TEXT NOT NULL,
	error_types TEXT NOT NULL
	)''')

	conn.commit()
	conn.close()


	def load_sentence_database(jsonl_file_path='sentencewise_full.jsonl'):
	"""Load sentence database from JSONL file"""
	print(f"Debug: Attempting to load from: {jsonl_file_path}")
	print(f"Debug: Current working directory: {os.getcwd()}")
	print(f"Debug: File exists: {os.path.exists(jsonl_file_path)}")

	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Create sentence database table
	c.execute('''CREATE TABLE IF NOT EXISTS sentence_database (
	id INTEGER PRIMARY KEY AUTOINCREMENT,
	text TEXT NOT NULL,
	tags TEXT NOT NULL,
	error_types TEXT NOT NULL
	)''')

	# Check if data already loaded
	c.execute("SELECT COUNT(*) FROM sentence_database")
	current_count = c.fetchone()[0]
	if current_count > 0:
	print(f"Sentence database already loaded with {current_count} sentences")
	conn.close()
	return

	# Load JSONL file
	try:
	print(f"Debug: Opening file {jsonl_file_path}")
	with open(jsonl_file_path, 'r', encoding='utf-8') as f:
	lines_processed = 0
	for line_num, line in enumerate(f, 1):
	try:
	line = line.strip()
	if not line: # Skip empty lines
	continue

	data = json.loads(line)
	text = data.get('text', '')
	tags = data.get('tags', [])

	if not text or not tags:
	print(f"Debug: Skipping line {line_num} - missing text or tags")
	continue

	# Extract second_level_tag error types
	error_types = []
	for tag in tags:
	second_level = tag.get('second_level_tag', '')
	if second_level:
	error_types.append(second_level)

	error_types = list(set(error_types)) # Remove duplicates

	# Debug: Print first few entries
	if line_num <= 3:
	print(f"Debug line {line_num}: text='{text[:50]}...', error_types={error_types}")
	print(f"Debug: Raw tags for line {line_num}: {tags}")

	if error_types: # Only insert if we have error types
	c.execute("""INSERT INTO sentence_database (text, tags, error_types)
	VALUES (?, ?, ?)""",
	(text, json.dumps(tags), json.dumps(error_types)))
	lines_processed += 1

	if line_num % 1000 == 0:
	print(f"Processed {line_num} lines, inserted {lines_processed} sentences...")

	except json.JSONDecodeError as e:
	print(f"JSON decode error on line {line_num}: {e}")
	print(f"Line content: {line[:100]}...")
	continue
	except Exception as e:
	print(f"Error processing line {line_num}: {e}")
	continue

	conn.commit()
	print(f"Successfully loaded sentence database with {lines_processed} sentences from {line_num} total lines")

	except FileNotFoundError:
	print(f"Error: {jsonl_file_path} not found in {os.getcwd()}")
	print("Available files:")
	try:
	files = os.listdir('.')
	for f in files:
	if f.endswith('.jsonl') or f.endswith('.json'):
	print(f" - {f}")
	except:
	print(" Could not list files")
	except Exception as e:
	print(f"Error loading sentence database: {e}")

	conn.close()

	def find_similar_sentences(error_types, limit=5):
	"""Find sentences with similar error types from database"""
	if not error_types:
	return []

	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Build query to find sentences with matching error types
	similar_sentences = []

	for error_type in error_types:
	c.execute("""SELECT text, tags FROM sentence_database
	WHERE error_types LIKE ?
	ORDER BY RANDOM()
	LIMIT ?""", (f'%"{error_type}"%', limit))

	results = c.fetchall()
	for text, tags_json in results:
	similar_sentences.append({
	'text': text,
	'tags': json.loads(tags_json)
	})

	conn.close()

	# Remove duplicates and limit to requested number
	seen_texts = set()
	unique_sentences = []
	for sentence in similar_sentences:
	if sentence['text'] not in seen_texts:
	seen_texts.add(sentence['text'])
	unique_sentences.append(sentence)
	if len(unique_sentences) >= limit:
	break

	return unique_sentences


	# Initialize database and components
	init_database()
	print("Clearing and loading sentence database...")
	clear_and_reload_database()
	print("Initializing enhanced grammar checker...")
	grammar_checker = HuggingFaceT5GEDInference()
	print("Grammar checker initialized successfully!")

	# Gradio Interface Functions
	def analyze_student_writing(text, student_name, task_title="General Writing Task"):
	"""Analyze student writing and store in database"""
	if not text.strip():
	return "Please enter some text to analyze.", ""

	if not student_name.strip():
	return "Please enter your name.", ""

	# Analyze text with enhanced model
	corrected_text, html_analysis = grammar_checker.analyze_text(text)

	# Store in database
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Insert task if not exists
	c.execute("INSERT OR IGNORE INTO tasks (title, description) VALUES (?, ?)",
	(task_title, f"Writing task: {task_title}"))

	c.execute("SELECT id FROM tasks WHERE title = ?", (task_title,))
	task_id = c.fetchone()[0]

	# Insert submission
	analysis_data = {
	"corrected_text": corrected_text,
	"original_text": text,
	"html_output": html_analysis
	}

	c.execute("""INSERT INTO submissions (task_id, student_name, content, analysis_result, analysis_html)
	VALUES (?, ?, ?, ?, ?)""",
	(task_id, student_name, text, json.dumps(analysis_data), html_analysis))

	submission_id = c.lastrowid
	conn.commit()
	conn.close()

	return corrected_text, html_analysis


	def create_exercise_from_text(text, exercise_title="Grammar Exercise"):
	"""Create an exercise from text with errors using sentence database"""
	if not text.strip():
	return "Please enter text to create an exercise.", ""

	# Analyze text to extract error types
	sentences = nltk.sent_tokenize(text)
	exercise_sentences = []
	all_error_types = []

	for sentence in sentences:
	# Get detailed error analysis
	error_spans, error_types = grammar_checker._get_error_spans_detailed(sentence)

	if error_types: # Has errors
	corrected, _ = grammar_checker.analyze_text(sentence)
	exercise_sentences.append({
	"original": sentence.strip(),
	"corrected": corrected.strip(),
	"error_types": error_types
	})
	all_error_types.extend(error_types)

	if not exercise_sentences:
	return "No errors found in the text. Cannot create exercise.", ""

	# Find similar sentences from database
	unique_error_types = list(set(all_error_types))
	similar_sentences = find_similar_sentences(unique_error_types, limit=5)

	# Combine original sentences with similar ones from database
	all_exercise_sentences = exercise_sentences.copy()

	for similar in similar_sentences:
	# Get corrected version of similar sentence
	corrected, _ = grammar_checker.analyze_text(similar['text'])
	all_exercise_sentences.append({
	"original": similar['text'],
	"corrected": corrected,
	"error_types": [tag.get('second_level_tag', '') for tag in similar['tags']]
	})

	# Store exercise in database
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	c.execute("""INSERT INTO exercises (title, instructions, sentences)
	VALUES (?, ?, ?)""",
	(exercise_title,
	"Correct the grammatical errors in the following sentences:",
	json.dumps(all_exercise_sentences)))

	exercise_id = c.lastrowid
	conn.commit()
	conn.close()

	# Generate exercise HTML
	exercise_html = f"""
	<div style='font-family: Arial, sans-serif; padding: 20px; border: 1px solid #ddd; border-radius: 8px;'>
	<h3>{exercise_title}</h3>
	<p><strong>Exercise ID: {exercise_id}</strong></p>
	<p><strong>Instructions:</strong> Correct the grammatical errors in the following sentences:</p>
	<p><em>Error types found: {', '.join(unique_error_types)}</em></p>
	<ol>
	"""

	for i, sentence_data in enumerate(all_exercise_sentences, 1):
	error_info = f" (Error types: {', '.join(sentence_data.get('error_types', []))})" if sentence_data.get('error_types') else ""
	exercise_html += f"<li style='margin: 10px 0; padding: 10px; background-color: #f8f9fa; border-radius: 4px;'>{sentence_data['original']}{error_info}</li>"

	exercise_html += "</ol></div>"

	return f"Exercise created with {len(all_exercise_sentences)} sentences ({len(exercise_sentences)} original + {len(similar_sentences)} from database)! Exercise ID: {exercise_id}", exercise_html


	def attempt_exercise(exercise_id, student_responses, student_name):
	"""Submit exercise attempt and get score using enhanced analysis"""
	if not student_name.strip():
	return "Please enter your name.", ""

	try:
	exercise_id = int(exercise_id)
	except:
	return "Please enter a valid exercise ID.", ""

	# Get exercise from database
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	c.execute("SELECT sentences FROM exercises WHERE id = ?", (exercise_id,))
	result = c.fetchone()

	if not result:
	return "Exercise not found.", ""

	exercise_sentences = json.loads(result[0])

	# Parse student responses
	responses = [r.strip() for r in student_responses.split('\n') if r.strip()]

	if len(responses) != len(exercise_sentences):
	return f"Please provide exactly {len(exercise_sentences)} responses (one per line).", ""

	# Calculate score using enhanced analysis
	correct_count = 0
	detailed_results = []

	for i, (sentence_data, response) in enumerate(zip(exercise_sentences, responses), 1):
	original = sentence_data['original']
	expected = sentence_data['corrected']

	# Use the model to check if the response is correct
	response_corrected, response_analysis = grammar_checker.analyze_text(response)
	is_correct = response_corrected.strip() == response.strip() # No further corrections needed

	if is_correct:
	correct_count += 1

	detailed_results.append({
	'sentence_num': i,
	'original': original,
	'student_response': response,
	'expected': expected,
	'model_correction': response_corrected,
	'is_correct': is_correct,
	'analysis_html': response_analysis
	})

	score = (correct_count / len(exercise_sentences)) * 100

	# Store attempt
	attempt_data = {
	"responses": responses,
	"score": score,
	"detailed_results": detailed_results
	}

	c.execute("""INSERT INTO exercise_attempts (exercise_id, student_name, responses, score)
	VALUES (?, ?, ?, ?)""",
	(exercise_id, student_name, json.dumps(attempt_data), score))

	conn.commit()
	conn.close()

	# Create beautiful HTML results
	score_color = "#28a745" if score >= 70 else "#ffc107" if score >= 50 else "#dc3545"

	feedback_html = f"""
	<div style='font-family: Arial, sans-serif; max-width: 1000px; margin: 0 auto;'>
	<!-- Header Section -->
	<div style='background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 30px; border-radius: 10px 10px 0 0; text-align: center;'>
	<h2 style='margin: 0; font-size: 28px;'>📊 Exercise Results</h2>
	<div style='margin-top: 15px; font-size: 48px; font-weight: bold; color: {score_color};'>{score:.1f}%</div>
	<p style='margin: 10px 0 0 0; font-size: 18px; opacity: 0.9;'>{correct_count} out of {len(exercise_sentences)} sentences correct</p>
	</div>

	<!-- Performance Badge -->
	<div style='background-color: #f8f9fa; padding: 20px; text-align: center; border-left: 1px solid #ddd; border-right: 1px solid #ddd;'>
	"""

	if score >= 90:
	feedback_html += """<span style='background-color: #28a745; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>🏆 Excellent Work!</span>"""
	elif score >= 70:
	feedback_html += """<span style='background-color: #17a2b8; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>👍 Good Job!</span>"""
	elif score >= 50:
	feedback_html += """<span style='background-color: #ffc107; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>📚 Keep Practicing!</span>"""
	else:
	feedback_html += """<span style='background-color: #dc3545; color: white; padding: 8px 20px; border-radius: 20px; font-weight: bold;'>💪 Try Again!</span>"""

	feedback_html += """
	</div>

	<!-- Detailed Results -->
	<div style='background-color: white; border: 1px solid #ddd; border-radius: 0 0 10px 10px;'>
	"""

	for result in detailed_results:
	# Determine colors and icons
	if result['is_correct']:
	border_color = "#28a745"
	icon = "✅"
	status_bg = "#d4edda"
	status_text = "Correct!"
	else:
	border_color = "#dc3545"
	icon = "❌"
	status_bg = "#f8d7da"
	status_text = "Needs Improvement"

	feedback_html += f"""
	<div style='border-left: 4px solid {border_color}; margin: 20px; padding: 20px; background-color: #fafafa; border-radius: 8px;'>
	<div style='display: flex; align-items: center; margin-bottom: 15px;'>
	<span style='font-size: 24px; margin-right: 10px;'>{icon}</span>
	<h4 style='margin: 0; color: #333;'>Sentence {result['sentence_num']}</h4>
	<span style='margin-left: auto; background-color: {status_bg}; padding: 4px 12px; border-radius: 12px; font-size: 12px; font-weight: bold;'>{status_text}</span>
	</div>

	<div style='margin-bottom: 15px;'>
	<div style='margin-bottom: 10px;'>
	<strong style='color: #6c757d;'>📝 Original:</strong>
	<div style='background-color: #e9ecef; padding: 10px; border-radius: 6px; margin-top: 5px; font-style: italic;'>{result['original']}</div>
	</div>

	<div style='margin-bottom: 10px;'>
	<strong style='color: #007bff;'>✏️ Your Answer:</strong>
	<div style='background-color: #e7f3ff; padding: 10px; border-radius: 6px; margin-top: 5px;'>{result['student_response']}</div>
	</div>
	"""

	# Only show model analysis if there were errors in student's response
	if not result['is_correct'] and result['analysis_html']:
	feedback_html += f"""
	<div style='margin-top: 15px; padding: 15px; background-color: #fff3cd; border-radius: 6px; border-left: 3px solid #ffc107;'>
	<strong style='color: #856404;'>🔍 Grammar Analysis of Your Response:</strong>
	<div style='margin-top: 10px; font-size: 14px;'>
	{result['analysis_html']}
	</div>
	</div>
	"""

	feedback_html += """
	</div>
	</div>
	"""

	feedback_html += """
	</div>

	<!-- Footer -->
	<div style='text-align: center; margin-top: 30px; color: #6c757d; font-size: 14px;'>
	<p>💡 <strong>Tip:</strong> Review the grammar analysis above to understand common error patterns and improve your writing!</p>
	</div>
	</div>
	"""

	return f"Score: {score:.1f}%", feedback_html


	def preview_exercise(exercise_id):
	"""Preview an exercise before attempting it"""
	if not exercise_id.strip():
	return "Please enter an exercise ID.", ""

	try:
	exercise_id = int(exercise_id)
	except:
	return "Please enter a valid exercise ID.", ""

	# Get exercise from database
	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	c.execute("SELECT title, instructions, sentences FROM exercises WHERE id = ?", (exercise_id,))
	result = c.fetchone()

	if not result:
	return "Exercise not found.", ""

	title, instructions, sentences_json = result
	exercise_sentences = json.loads(sentences_json)

	conn.close()

	# Create preview HTML
	preview_html = f"""
	<div style='font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto;'>
	<!-- Header -->
	<div style='background: linear-gradient(135deg, #4CAF50 0%, #45a049 100%); color: white; padding: 25px; border-radius: 10px 10px 0 0; text-align: center;'>
	<h2 style='margin: 0; font-size: 24px;'>📋 {title}</h2>
	<p style='margin: 10px 0 0 0; font-size: 16px; opacity: 0.9;'>Exercise ID: {exercise_id}</p>
	</div>

	<!-- Instructions -->
	<div style='background-color: #e8f5e9; padding: 20px; border-left: 1px solid #ddd; border-right: 1px solid #ddd;'>
	<h3 style='margin: 0 0 10px 0; color: #2e7d32;'>📝 Instructions:</h3>
	<p style='margin: 0; font-size: 16px; line-height: 1.5;'>{instructions}</p>
	<p style='margin: 10px 0 0 0; font-size: 14px; color: #666; font-style: italic;'>
	💡 Tip: Read each sentence carefully and identify grammatical errors before writing your corrections.
	</p>
	</div>

	<!-- Sentences -->
	<div style='background-color: white; border: 1px solid #ddd; border-radius: 0 0 10px 10px; padding: 20px;'>
	<h3 style='margin: 0 0 20px 0; color: #333;'>📚 Sentences to Correct ({len(exercise_sentences)} total):</h3>
	<ol style='padding-left: 20px;'>
	"""

	for i, sentence_data in enumerate(exercise_sentences, 1):
	original = sentence_data['original']
	error_types = sentence_data.get('error_types', [])

	# Add error type hints if available
	error_hint = ""
	if error_types:
	error_hint = f"<br><small style='color: #666; font-style: italic;'>💡 Focus on: {', '.join(error_types)}</small>"

	preview_html += f"""
	<li style='margin: 15px 0; padding: 15px; background-color: #f8f9fa; border-radius: 6px; border-left: 3px solid #4CAF50;'>
	<div style='font-size: 16px; line-height: 1.5; margin-bottom: 5px;'>{original}</div>
	{error_hint}
	</li>
	"""

	preview_html += f"""
	</ol>

	<div style='margin-top: 30px; padding: 20px; background-color: #f0f8ff; border-radius: 8px; border: 1px solid #b3d9ff;'>
	<h4 style='margin: 0 0 10px 0; color: #0066cc;'>🎯 How to Complete This Exercise:</h4>
	<ol style='margin: 0; padding-left: 20px; color: #333;'>
	<li>Read each sentence carefully</li>
	<li>Identify grammatical errors (spelling, grammar, word choice, etc.)</li>
	<li>Write your corrected version of each sentence</li>
	<li>Enter all your answers in the text box below (one sentence per line)</li>
	<li>Submit to get immediate feedback and scoring</li>
	</ol>
	</div>
	</div>
	</div>
	"""

	return f"Exercise '{title}' loaded successfully! {len(exercise_sentences)} sentences to correct.", preview_html


	def get_student_progress(student_name):
	"""Get student's submission and exercise history"""
	if not student_name.strip():
	return "Please enter a student name."

	conn = sqlite3.connect('language_app.db')
	c = conn.cursor()

	# Get submissions
	c.execute("""SELECT s.id, s.content, s.created_at, t.title
	FROM submissions s JOIN tasks t ON s.task_id = t.id
	WHERE s.student_name = ? ORDER BY s.created_at DESC""", (student_name,))
	submissions = c.fetchall()

	# Get exercise attempts
	c.execute("""SELECT ea.score, ea.created_at, e.title
	FROM exercise_attempts ea JOIN exercises e ON ea.exercise_id = e.id
	WHERE ea.student_name = ? ORDER BY ea.created_at DESC""", (student_name,))
	attempts = c.fetchall()

	conn.close()

	progress_html = f"""
	<div style='font-family: Arial, sans-serif; padding: 20px;'>
	<h3>Progress for {student_name}</h3>

	<h4>Writing Submissions ({len(submissions)})</h4>
	<ul>
	"""

	for sub in submissions:
	progress_html += f"<li><strong>{sub[3]}</strong> - {sub[2][:16]} - {len(sub[1])} characters</li>"

	progress_html += f"""
	</ul>

	<h4>Exercise Attempts ({len(attempts)})</h4>
	<ul>
	"""

	for att in attempts:
	progress_html += f"<li><strong>{att[2]}</strong> - Score: {att[0]:.1f}% - {att[1][:16]}</li>"

	progress_html += "</ul></div>"

	return progress_html

	# Create Gradio Interface
	with gr.Blocks(title="Language Learning App - Enhanced Grammar Checker", theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🎓 Language Learning Application")
	gr.Markdown("### AI-Powered Grammar Checking and Exercise Generation")
	gr.Markdown("Now featuring advanced T5-GED neural network with enhanced error detection")

	with gr.Tabs():
	# Student Writing Analysis Tab
	with gr.TabItem("📝 Writing Analysis"):
	gr.Markdown("## Submit Your Writing for Analysis")

	with gr.Row():
	with gr.Column():
	student_name_input = gr.Textbox(label="Your Name", placeholder="Enter your name")
	task_title_input = gr.Textbox(label="Assignment Title", value="General Writing Task")
	writing_input = gr.Textbox(
	label="Your Writing",
	lines=8,
	placeholder="Paste your writing here for grammar analysis..."
	)
	analyze_btn = gr.Button("Analyze Writing", variant="primary")

	with gr.Column():
	corrected_output = gr.Textbox(label="Corrected Text", lines=6)
	analysis_output = gr.HTML(label="Detailed Analysis")

	analyze_btn.click(
	analyze_student_writing,
	inputs=[writing_input, student_name_input, task_title_input],
	outputs=[corrected_output, analysis_output]
	)

	# Exercise Creation Tab
	with gr.TabItem("🏋️ Exercise Creation"):
	gr.Markdown("## Create Grammar Exercises")

	with gr.Row():
	with gr.Column():
	exercise_title_input = gr.Textbox(label="Exercise Title", value="Grammar Exercise")
	exercise_text_input = gr.Textbox(
	label="Text with Errors",
	lines=6,
	placeholder="Enter text containing grammatical errors to create an exercise..."
	)
	create_exercise_btn = gr.Button("Create Exercise", variant="primary")

	with gr.Column():
	exercise_result = gr.Textbox(label="Result")
	exercise_display = gr.HTML(label="Generated Exercise")

	create_exercise_btn.click(
	create_exercise_from_text,
	inputs=[exercise_text_input, exercise_title_input],
	outputs=[exercise_result, exercise_display]
	)

	# Exercise Attempt Tab
	with gr.TabItem("✏️ Exercise Practice"):
	gr.Markdown("## Practice Grammar Exercises")
	with gr.Row():
	with gr.Column():
	exercise_id_input = gr.Textbox(label="Exercise ID", placeholder="Enter exercise ID")

	# Preview section
	with gr.Row():
	preview_btn = gr.Button("👀 Preview Exercise", variant="secondary")

	preview_result = gr.Textbox(label="Preview Status", lines=1)
	preview_display = gr.HTML(label="Exercise Preview")

	# Separator
	gr.Markdown("---")

	# Attempt section
	gr.Markdown("### 📝 Complete the Exercise")
	student_name_exercise = gr.Textbox(label="Your Name", placeholder="Enter your name")
	responses_input = gr.Textbox(
	label="Your Answers",
	lines=8,
	placeholder="After previewing the exercise above, enter your corrected sentences here (one per line)..."
	)
	submit_exercise_btn = gr.Button("✅ Submit Answers", variant="primary")

	with gr.Column():
	score_output = gr.Textbox(label="Your Score")
	feedback_output = gr.HTML(label="Detailed Feedback")

	# Connect the buttons
	preview_btn.click(
	preview_exercise,
	inputs=[exercise_id_input],
	outputs=[preview_result, preview_display]
	)

	submit_exercise_btn.click(
	attempt_exercise,
	inputs=[exercise_id_input, responses_input, student_name_exercise],
	outputs=[score_output, feedback_output]
	)
	# Progress Tracking Tab
	with gr.TabItem("📊 Student Progress"):
	gr.Markdown("## View Student Progress")

	with gr.Row():
	with gr.Column(scale=1):
	progress_student_name = gr.Textbox(label="Student Name", placeholder="Enter student name")
	get_progress_btn = gr.Button("Get Progress", variant="primary")

	with gr.Column(scale=2):
	progress_output = gr.HTML(label="Student Progress")

	get_progress_btn.click(
	get_student_progress,
	inputs=[progress_student_name],
	outputs=[progress_output]
	)

	gr.Markdown("""
	---
	### How to Use:
	1. Writing Analysis: Submit your writing to get grammar corrections and detailed error analysis
	2. Exercise Creation: Teachers can create exercises from text containing errors
	3. Exercise Practice: Students can practice with generated exercises and get scored feedback
	4. Progress Tracking: View student progress across submissions and exercises

	Powered by advanced T5-GED neural networks for enhanced grammar error detection and correction
	""")

	if __name__ == "__main__":
	app.launch(share=True)