|
""" |
|
Storage Management Tools |
|
Provides tools for managing download and transcript storage |
|
""" |
|
|
|
from typing import Dict, Any, List |
|
from pathlib import Path |
|
|
|
from ..utils.storage_config import get_storage_config |
|
|
|
|
|
async def get_storage_info_tool() -> Dict[str, Any]: |
|
""" |
|
Get comprehensive storage information including directory sizes and file counts |
|
|
|
Returns: |
|
Storage information dictionary |
|
""" |
|
try: |
|
storage_config = get_storage_config() |
|
storage_info = storage_config.get_storage_info() |
|
|
|
print(f"π Storage Information:") |
|
print(f" Downloads: {storage_info['downloads_dir']}") |
|
print(f" Transcripts: {storage_info['transcripts_dir']}") |
|
print(f" Cache: {storage_info['cache_dir']}") |
|
|
|
return {"status": "success", **storage_info} |
|
|
|
except Exception as e: |
|
return {"status": "failed", "error_message": str(e)} |
|
|
|
|
|
async def list_audio_files_tool() -> Dict[str, Any]: |
|
""" |
|
List all audio files in the downloads directory |
|
|
|
Returns: |
|
List of audio files with metadata |
|
""" |
|
try: |
|
storage_config = get_storage_config() |
|
audio_files = storage_config.get_audio_files() |
|
|
|
file_list = [] |
|
total_size = 0 |
|
|
|
for audio_file in audio_files: |
|
file_size = audio_file.stat().st_size |
|
total_size += file_size |
|
|
|
|
|
transcript_files = storage_config.get_transcript_files(audio_file.name) |
|
has_transcripts = { |
|
'txt': transcript_files['txt'].exists(), |
|
'srt': transcript_files['srt'].exists(), |
|
'json': transcript_files['json'].exists() |
|
} |
|
|
|
file_info = { |
|
"filename": audio_file.name, |
|
"path": str(audio_file), |
|
"size_mb": round(file_size / (1024 * 1024), 2), |
|
"modified": audio_file.stat().st_mtime, |
|
"has_transcripts": has_transcripts, |
|
"transcript_count": sum(has_transcripts.values()) |
|
} |
|
file_list.append(file_info) |
|
|
|
print(f"π Found {len(audio_files)} audio files ({round(total_size / (1024 * 1024), 2)} MB total)") |
|
|
|
return { |
|
"status": "success", |
|
"audio_files_count": len(audio_files), |
|
"total_size_mb": round(total_size / (1024 * 1024), 2), |
|
"downloads_directory": str(storage_config.downloads_dir), |
|
"audio_files": file_list |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "failed", |
|
"error_message": f"Audio files listing tool error: {str(e)}" |
|
} |
|
|
|
|
|
async def list_transcript_files_tool() -> Dict[str, Any]: |
|
""" |
|
List all transcript files in the transcripts directory |
|
|
|
Returns: |
|
List of transcript files organized by format |
|
""" |
|
try: |
|
storage_config = get_storage_config() |
|
transcript_files = storage_config.get_transcript_files() |
|
|
|
organized_files = {} |
|
total_files = 0 |
|
total_size = 0 |
|
|
|
for format_type, files in transcript_files.items(): |
|
format_info = [] |
|
format_size = 0 |
|
|
|
for transcript_file in files: |
|
file_size = transcript_file.stat().st_size |
|
format_size += file_size |
|
total_size += file_size |
|
|
|
|
|
base_name = transcript_file.stem |
|
audio_files = storage_config.get_audio_files() |
|
has_audio = any(af.stem == base_name for af in audio_files) |
|
|
|
file_info = { |
|
"filename": transcript_file.name, |
|
"path": str(transcript_file), |
|
"size_kb": round(file_size / 1024, 2), |
|
"modified": transcript_file.stat().st_mtime, |
|
"base_name": base_name, |
|
"has_audio": has_audio |
|
} |
|
format_info.append(file_info) |
|
|
|
organized_files[format_type] = { |
|
"count": len(files), |
|
"size_kb": round(format_size / 1024, 2), |
|
"files": format_info |
|
} |
|
total_files += len(files) |
|
|
|
print(f"π Found {total_files} transcript files ({round(total_size / 1024, 2)} KB total)") |
|
|
|
return { |
|
"status": "success", |
|
"total_files": total_files, |
|
"total_size_kb": round(total_size / 1024, 2), |
|
"transcripts_directory": str(storage_config.transcripts_dir), |
|
"formats": organized_files |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "failed", |
|
"error_message": f"Transcript files listing tool error: {str(e)}" |
|
} |
|
|
|
|
|
async def cleanup_cache_tool(pattern: str = "temp_*") -> Dict[str, Any]: |
|
""" |
|
Clean up temporary files in cache directory |
|
|
|
Args: |
|
pattern: File pattern to match for cleanup (default: temp_*) |
|
|
|
Returns: |
|
Cleanup result |
|
""" |
|
try: |
|
storage_config = get_storage_config() |
|
|
|
|
|
cache_info_before = storage_config.get_storage_info() |
|
cache_size_before = cache_info_before['cache_size_mb'] |
|
|
|
|
|
storage_config.cleanup_temp_files(pattern) |
|
|
|
|
|
cache_info_after = storage_config.get_storage_info() |
|
cache_size_after = cache_info_after['cache_size_mb'] |
|
|
|
cleaned_mb = cache_size_before - cache_size_after |
|
|
|
print(f"ποΈ Cache cleanup completed") |
|
print(f" Pattern: {pattern}") |
|
print(f" Cleaned: {cleaned_mb:.2f} MB") |
|
print(f" Cache size: {cache_size_before:.2f} MB β {cache_size_after:.2f} MB") |
|
|
|
return { |
|
"status": "success", |
|
"cleanup_pattern": pattern, |
|
"cache_directory": str(storage_config.cache_dir), |
|
"size_before_mb": cache_size_before, |
|
"size_after_mb": cache_size_after, |
|
"cleaned_mb": cleaned_mb |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "failed", |
|
"error_message": f"Cache cleanup tool error: {str(e)}" |
|
} |
|
|
|
|
|
async def check_transcript_status_tool(audio_filename: str = None) -> Dict[str, Any]: |
|
""" |
|
Check transcript status for audio files |
|
|
|
Args: |
|
audio_filename: Specific audio file to check (optional) |
|
|
|
Returns: |
|
Transcript status information |
|
""" |
|
try: |
|
storage_config = get_storage_config() |
|
|
|
if audio_filename: |
|
|
|
audio_path = storage_config.get_download_path(audio_filename) |
|
if not audio_path.exists(): |
|
return { |
|
"status": "failed", |
|
"error_message": f"Audio file not found: {audio_filename}" |
|
} |
|
|
|
transcript_files = storage_config.get_transcript_files(audio_filename) |
|
status = { |
|
"audio_file": audio_filename, |
|
"audio_exists": True, |
|
"transcripts": { |
|
format_type: { |
|
"exists": file_path.exists(), |
|
"path": str(file_path), |
|
"size_kb": round(file_path.stat().st_size / 1024, 2) if file_path.exists() else 0 |
|
} |
|
for format_type, file_path in transcript_files.items() |
|
} |
|
} |
|
|
|
return { |
|
"status": "success", |
|
"mode": "single_file", |
|
**status |
|
} |
|
else: |
|
|
|
audio_files = storage_config.get_audio_files() |
|
statuses = [] |
|
|
|
summary = { |
|
"total_audio_files": len(audio_files), |
|
"files_with_transcripts": 0, |
|
"files_without_transcripts": 0, |
|
"transcript_formats": {"txt": 0, "srt": 0, "json": 0} |
|
} |
|
|
|
for audio_file in audio_files: |
|
transcript_files = storage_config.get_transcript_files(audio_file.name) |
|
|
|
has_any_transcript = any(tf.exists() for tf in transcript_files.values()) |
|
if has_any_transcript: |
|
summary["files_with_transcripts"] += 1 |
|
else: |
|
summary["files_without_transcripts"] += 1 |
|
|
|
file_status = { |
|
"audio_file": audio_file.name, |
|
"has_transcripts": has_any_transcript, |
|
"transcript_formats": { |
|
format_type: file_path.exists() |
|
for format_type, file_path in transcript_files.items() |
|
} |
|
} |
|
|
|
|
|
for format_type, exists in file_status["transcript_formats"].items(): |
|
if exists: |
|
summary["transcript_formats"][format_type] += 1 |
|
|
|
statuses.append(file_status) |
|
|
|
print(f"π Transcript Status Summary:") |
|
print(f" Total audio files: {summary['total_audio_files']}") |
|
print(f" With transcripts: {summary['files_with_transcripts']}") |
|
print(f" Without transcripts: {summary['files_without_transcripts']}") |
|
print(f" Format counts: TXT({summary['transcript_formats']['txt']}) SRT({summary['transcript_formats']['srt']}) JSON({summary['transcript_formats']['json']})") |
|
|
|
return { |
|
"status": "success", |
|
"mode": "all_files", |
|
"summary": summary, |
|
"file_statuses": statuses |
|
} |
|
|
|
except Exception as e: |
|
return { |
|
"status": "failed", |
|
"error_message": f"Transcript status tool error: {str(e)}" |
|
} |