File size: 1,358 Bytes
b7bbd70
 
86f4186
a7fb4a4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86f4186
b7bbd70
 
 
 
 
 
 
 
 
86f4186
b7bbd70
 
 
 
 
 
2f3bed4
a7fb4a4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import os
from magika import Magika

def validate_file_types(directory):
    m = Magika()
    file_types = {}
    for root, _, files in os.walk(directory):
        if '.git' in root:
            continue
        for file_name in files:
            file_path = os.path.join(root, file_name)
            try:
                with open(file_path, 'rb') as file:
                    file_bytes = file.read()
                result = m.identify_bytes(file_bytes)
                file_types[file_path] = result.output.ct_label
            except Exception as e:
                file_types[file_path] = f"Error: {str(e)}"
    return file_types

def get_file_summary(file_path, file_type):
    size = os.path.getsize(file_path)
    return {
        "name": os.path.relpath(file_path),
        "type": file_type,
        "size": size,
        "creation_date": os.path.getctime(file_path),
        "modification_date": os.path.getmtime(file_path)
    }

def read_file_content(file_path, max_size=32*1024):
    with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
        if os.path.getsize(file_path) > max_size:
            return file.read(max_size) + "\n... [Content Truncated] ..."
        else:
            return file.read()

def summarize_content(content):
    # Implement your summarization logic here, potentially using transformers
    pass