File size: 10,355 Bytes
b5df735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
"""
Storage Management Tools
Provides tools for managing download and transcript storage
"""

from typing import Dict, Any, List
from pathlib import Path

from ..utils.storage_config import get_storage_config


async def get_storage_info_tool() -> Dict[str, Any]:
    """
    Get comprehensive storage information including directory sizes and file counts
    
    Returns:
        Storage information dictionary
    """
    try:
        storage_config = get_storage_config()
        storage_info = storage_config.get_storage_info()
        
        print(f"πŸ“Š Storage Information:")
        print(f"   Downloads: {storage_info['downloads_dir']}")
        print(f"   Transcripts: {storage_info['transcripts_dir']}")
        print(f"   Cache: {storage_info['cache_dir']}")
        
        return {"status": "success", **storage_info}
        
    except Exception as e:
        return {"status": "failed", "error_message": str(e)}


async def list_audio_files_tool() -> Dict[str, Any]:
    """
    List all audio files in the downloads directory
    
    Returns:
        List of audio files with metadata
    """
    try:
        storage_config = get_storage_config()
        audio_files = storage_config.get_audio_files()
        
        file_list = []
        total_size = 0
        
        for audio_file in audio_files:
            file_size = audio_file.stat().st_size
            total_size += file_size
            
            # Check for corresponding transcript files
            transcript_files = storage_config.get_transcript_files(audio_file.name)
            has_transcripts = {
                'txt': transcript_files['txt'].exists(),
                'srt': transcript_files['srt'].exists(),
                'json': transcript_files['json'].exists()
            }
            
            file_info = {
                "filename": audio_file.name,
                "path": str(audio_file),
                "size_mb": round(file_size / (1024 * 1024), 2),
                "modified": audio_file.stat().st_mtime,
                "has_transcripts": has_transcripts,
                "transcript_count": sum(has_transcripts.values())
            }
            file_list.append(file_info)
        
        print(f"πŸ“ Found {len(audio_files)} audio files ({round(total_size / (1024 * 1024), 2)} MB total)")
        
        return {
            "status": "success",
            "audio_files_count": len(audio_files),
            "total_size_mb": round(total_size / (1024 * 1024), 2),
            "downloads_directory": str(storage_config.downloads_dir),
            "audio_files": file_list
        }
        
    except Exception as e:
        return {
            "status": "failed",
            "error_message": f"Audio files listing tool error: {str(e)}"
        }


async def list_transcript_files_tool() -> Dict[str, Any]:
    """
    List all transcript files in the transcripts directory
    
    Returns:
        List of transcript files organized by format
    """
    try:
        storage_config = get_storage_config()
        transcript_files = storage_config.get_transcript_files()
        
        organized_files = {}
        total_files = 0
        total_size = 0
        
        for format_type, files in transcript_files.items():
            format_info = []
            format_size = 0
            
            for transcript_file in files:
                file_size = transcript_file.stat().st_size
                format_size += file_size
                total_size += file_size
                
                # Check if corresponding audio file exists
                base_name = transcript_file.stem
                audio_files = storage_config.get_audio_files()
                has_audio = any(af.stem == base_name for af in audio_files)
                
                file_info = {
                    "filename": transcript_file.name,
                    "path": str(transcript_file),
                    "size_kb": round(file_size / 1024, 2),
                    "modified": transcript_file.stat().st_mtime,
                    "base_name": base_name,
                    "has_audio": has_audio
                }
                format_info.append(file_info)
            
            organized_files[format_type] = {
                "count": len(files),
                "size_kb": round(format_size / 1024, 2),
                "files": format_info
            }
            total_files += len(files)
        
        print(f"πŸ“„ Found {total_files} transcript files ({round(total_size / 1024, 2)} KB total)")
        
        return {
            "status": "success",
            "total_files": total_files,
            "total_size_kb": round(total_size / 1024, 2),
            "transcripts_directory": str(storage_config.transcripts_dir),
            "formats": organized_files
        }
        
    except Exception as e:
        return {
            "status": "failed",
            "error_message": f"Transcript files listing tool error: {str(e)}"
        }


async def cleanup_cache_tool(pattern: str = "temp_*") -> Dict[str, Any]:
    """
    Clean up temporary files in cache directory
    
    Args:
        pattern: File pattern to match for cleanup (default: temp_*)
        
    Returns:
        Cleanup result
    """
    try:
        storage_config = get_storage_config()
        
        # Get cache size before cleanup
        cache_info_before = storage_config.get_storage_info()
        cache_size_before = cache_info_before['cache_size_mb']
        
        # Perform cleanup
        storage_config.cleanup_temp_files(pattern)
        
        # Get cache size after cleanup
        cache_info_after = storage_config.get_storage_info()
        cache_size_after = cache_info_after['cache_size_mb']
        
        cleaned_mb = cache_size_before - cache_size_after
        
        print(f"πŸ—‘οΈ Cache cleanup completed")
        print(f"   Pattern: {pattern}")
        print(f"   Cleaned: {cleaned_mb:.2f} MB")
        print(f"   Cache size: {cache_size_before:.2f} MB β†’ {cache_size_after:.2f} MB")
        
        return {
            "status": "success",
            "cleanup_pattern": pattern,
            "cache_directory": str(storage_config.cache_dir),
            "size_before_mb": cache_size_before,
            "size_after_mb": cache_size_after,
            "cleaned_mb": cleaned_mb
        }
        
    except Exception as e:
        return {
            "status": "failed",
            "error_message": f"Cache cleanup tool error: {str(e)}"
        }


async def check_transcript_status_tool(audio_filename: str = None) -> Dict[str, Any]:
    """
    Check transcript status for audio files
    
    Args:
        audio_filename: Specific audio file to check (optional)
        
    Returns:
        Transcript status information
    """
    try:
        storage_config = get_storage_config()
        
        if audio_filename:
            # Check specific file
            audio_path = storage_config.get_download_path(audio_filename)
            if not audio_path.exists():
                return {
                    "status": "failed",
                    "error_message": f"Audio file not found: {audio_filename}"
                }
            
            transcript_files = storage_config.get_transcript_files(audio_filename)
            status = {
                "audio_file": audio_filename,
                "audio_exists": True,
                "transcripts": {
                    format_type: {
                        "exists": file_path.exists(),
                        "path": str(file_path),
                        "size_kb": round(file_path.stat().st_size / 1024, 2) if file_path.exists() else 0
                    }
                    for format_type, file_path in transcript_files.items()
                }
            }
            
            return {
                "status": "success",
                "mode": "single_file",
                **status
            }
        else:
            # Check all audio files
            audio_files = storage_config.get_audio_files()
            statuses = []
            
            summary = {
                "total_audio_files": len(audio_files),
                "files_with_transcripts": 0,
                "files_without_transcripts": 0,
                "transcript_formats": {"txt": 0, "srt": 0, "json": 0}
            }
            
            for audio_file in audio_files:
                transcript_files = storage_config.get_transcript_files(audio_file.name)
                
                has_any_transcript = any(tf.exists() for tf in transcript_files.values())
                if has_any_transcript:
                    summary["files_with_transcripts"] += 1
                else:
                    summary["files_without_transcripts"] += 1
                
                file_status = {
                    "audio_file": audio_file.name,
                    "has_transcripts": has_any_transcript,
                    "transcript_formats": {
                        format_type: file_path.exists()
                        for format_type, file_path in transcript_files.items()
                    }
                }
                
                # Count transcript formats
                for format_type, exists in file_status["transcript_formats"].items():
                    if exists:
                        summary["transcript_formats"][format_type] += 1
                
                statuses.append(file_status)
            
            print(f"πŸ“Š Transcript Status Summary:")
            print(f"   Total audio files: {summary['total_audio_files']}")
            print(f"   With transcripts: {summary['files_with_transcripts']}")
            print(f"   Without transcripts: {summary['files_without_transcripts']}")
            print(f"   Format counts: TXT({summary['transcript_formats']['txt']}) SRT({summary['transcript_formats']['srt']}) JSON({summary['transcript_formats']['json']})")
            
            return {
                "status": "success",
                "mode": "all_files",
                "summary": summary,
                "file_statuses": statuses
            }
            
    except Exception as e:
        return {
            "status": "failed",
            "error_message": f"Transcript status tool error: {str(e)}"
        }