oceansweep commited on
Commit
965a1e8
·
verified ·
1 Parent(s): d7a2489

Delete App_Function_Libraries/Book_Ingestion_Lib.py

Browse files
App_Function_Libraries/Book_Ingestion_Lib.py DELETED
@@ -1,95 +0,0 @@
1
- # Book_Ingestion_Lib.py
2
- #########################################
3
- # Library to hold functions for ingesting book files.#
4
- #
5
- ####################
6
- # Function List
7
- #
8
- # 1. ingest_text_file(file_path, title=None, author=None, keywords=None):
9
- # 2.
10
- #
11
- #
12
- ####################
13
-
14
-
15
- # Import necessary libraries
16
- import os
17
- import re
18
- from datetime import datetime
19
- import logging
20
-
21
-
22
- # Import Local
23
- from App_Function_Libraries.DB.SQLite_DB import add_media_with_keywords
24
-
25
- #######################################################################################################################
26
- # Function Definitions
27
- #
28
-
29
- # Ingest a text file into the database with Title/Author/Keywords
30
-
31
- def extract_epub_metadata(content):
32
- title_match = re.search(r'Title:\s*(.*?)\n', content)
33
- author_match = re.search(r'Author:\s*(.*?)\n', content)
34
-
35
- title = title_match.group(1) if title_match else None
36
- author = author_match.group(1) if author_match else None
37
-
38
- return title, author
39
-
40
-
41
- def ingest_text_file(file_path, title=None, author=None, keywords=None):
42
- try:
43
- with open(file_path, 'r', encoding='utf-8') as file:
44
- content = file.read()
45
-
46
- # Check if it's a converted epub and extract metadata if so
47
- if 'epub_converted' in (keywords or ''):
48
- extracted_title, extracted_author = extract_epub_metadata(content)
49
- title = title or extracted_title
50
- author = author or extracted_author
51
-
52
- # If title is still not provided, use the filename without extension
53
- if not title:
54
- title = os.path.splitext(os.path.basename(file_path))[0]
55
-
56
- # If author is still not provided, set it to 'Unknown'
57
- if not author:
58
- author = 'Unknown'
59
-
60
- # If keywords are not provided, use a default keyword
61
- if not keywords:
62
- keywords = 'text_file,epub_converted'
63
- else:
64
- keywords = f'text_file,epub_converted,{keywords}'
65
-
66
- # Add the text file to the database
67
- add_media_with_keywords(
68
- url=file_path,
69
- title=title,
70
- media_type='document',
71
- content=content,
72
- keywords=keywords,
73
- prompt='No prompt for text files',
74
- summary='No summary for text files',
75
- transcription_model='None',
76
- author=author,
77
- ingestion_date=datetime.now().strftime('%Y-%m-%d')
78
- )
79
-
80
- return f"Text file '{title}' by {author} ingested successfully."
81
- except Exception as e:
82
- logging.error(f"Error ingesting text file: {str(e)}")
83
- return f"Error ingesting text file: {str(e)}"
84
-
85
-
86
- def ingest_folder(folder_path, keywords=None):
87
- results = []
88
- for filename in os.listdir(folder_path):
89
- if filename.lower().endswith('.txt'):
90
- file_path = os.path.join(folder_path, filename)
91
- result = ingest_text_file(file_path, keywords=keywords)
92
- results.append(result)
93
-
94
-
95
-