pmkhanh7890's picture
1st
22e1b62
raw
history blame
1.47 kB
from texts.models import TextDetector
def extract_text_and_images(path: str):
text_content = ""
image_paths = ""
return text_content, image_paths
def process_document(document_path) -> list:
"""
Processes a given document, separating text and images,
and then analyzes them.
Args:
document_path: Path to the document.
Returns:
A list containing the AI content percentage for text and images.
"""
# Extract text and images from the document
text_content, image_paths = extract_text_and_images(document_path)
# Analyze text content
text_detector = TextDetector()
text_ai_content_percentage = text_detector.analyze_text(text_content)
# Analyze image content
image_ai_content_percentages = []
for image_path in image_paths:
# TODO: add image_detector class
# image_ai_content = image_detector.analyze_image(image_path)
image_ai_content = 100
image_ai_content_percentages.append(image_ai_content)
return [text_ai_content_percentage, image_ai_content_percentages]
def main():
document_path = "../data.pdf" # Replace with your document path
text_ai_content_percentage, image_ai_content_percentages = (
process_document(document_path)
)
print("Text AI Content Percentage:", text_ai_content_percentage)
print("Combined AI Content Percentage:", image_ai_content_percentages)
if __name__ == "__main__":
main()