MatteoFasulo commited on
Commit
0191eab
·
1 Parent(s): ea01d17

Added utils py script and other files

Browse files
.gitignore ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ # media folder
163
+ media/*
164
+
165
+ # background folder
166
+ background/*
167
+
168
+ # output folder
169
+ output/*
170
+
171
+ # Cookies
172
+ cookies.txt
173
+
174
+ # reddit2json
175
+ .env
176
+ Pipfile
177
+ Pipfile.lock
.streamlit/config.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor="#BD93F9"
3
+ backgroundColor="#282A36"
4
+ secondaryBackgroundColor="#44475A"
5
+ textColor="#F8F8F2"
6
+ font="sans serif"
7
+
8
+ [client]
9
+ showSidebarNavigation = false
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code of Conduct
2
+
3
+ We expect all contributors and participants in the Reddit Video Maker Bot project to adhere to the following code of conduct. We want to create a welcoming and inclusive community where everyone feels respected and valued, regardless of their background, experience, or personal beliefs.
4
+
5
+ ## Our Standards
6
+
7
+ Examples of behavior that contribute to creating a positive environment include:
8
+
9
+ - Being respectful and considerate towards others, their opinions, and their work.
10
+ - Using welcoming and inclusive language.
11
+ - Showing empathy towards other contributors and participants.
12
+ - Being open to constructive feedback and criticism.
13
+ - Focusing on what is best for the community and the project.
14
+
15
+ Examples of unacceptable behavior include:
16
+
17
+ - Using derogatory, offensive, or discriminatory language or behavior.
18
+ - Trolling, insulting, or derogatory comments or personal attacks.
19
+ - Engaging in any form of harassment or intimidation, including but not limited to sexual harassment, racism, and hate speech.
20
+ - Public or private harassment, insults, or threats against anyone.
21
+ - Publishing or communicating private information about others without their consent.
22
+ - Other conduct that would be considered inappropriate in a professional setting.
23
+
24
+ ## Enforcement
25
+
26
+ We take this code of conduct seriously and will enforce it to the best of our ability. We expect all participants to follow these guidelines at all times, and any behavior that violates these standards may result in a warning or, in extreme cases, expulsion from the project.
27
+
28
+ If you witness or experience behavior that violates this code of conduct, please contact the project maintainers or send an email to the project email address with the subject line "Code of Conduct Violation." All complaints will be reviewed and investigated, and appropriate action will be taken.
29
+
30
+ ## Attribution
31
+
32
+ This code of conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
CONTRIBUTING.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to ChatGPT-TikTok 🚀
2
+
3
+ Thank you for your interest in contributing to our project! We appreciate your support and look forward to working with you.
4
+
5
+ ## Getting Started
6
+
7
+ Before contributing to our project, we recommend that you familiarize yourself with our project's [code of conduct](CODE_OF_CONDUCT.md). We also encourage you to review the [existing issues](https://github.com/MatteoFasulo/ChatGPT-TikTok/issues) and [pull requests](https://github.com/MatteoFasulo/ChatGPT-TikTok/pulls) to get an idea of what needs to be done and to avoid duplicating efforts.
8
+
9
+ ## Ways to Contribute
10
+
11
+ ### Reporting Issues
12
+
13
+ If you encounter any bugs or issues, please let us know by creating an issue in our project's [issue tracker](https://github.com/MatteoFasulo/ChatGPT-TikTok/issues). When reporting an issue, please include as much detail as possible, such as a clear and descriptive title, a step-by-step description of the problem, and any relevant screenshots or error messages.
14
+
15
+ ### Suggesting Enhancements
16
+
17
+ We welcome suggestions for new features or enhancements to our project! Please create an issue in our project's [issue tracker](https://github.com/MatteoFasulo/ChatGPT-TikTok/issues) and describe the new feature or enhancement you'd like to see. Be sure to provide as much detail as possible, such as why you think the feature would be useful, any relevant use cases, and any potential challenges or limitations.
18
+
19
+ ### Contributing Code
20
+
21
+ We appreciate contributions of all kinds, including code contributions! Before contributing code, please make sure to do the following:
22
+
23
+ 1. Review the [existing issues](https://github.com/MatteoFasulo/ChatGPT-TikTok/issues) and [pull requests](https://github.com/MatteoFasulo/ChatGPT-TikTok/pulls) to make sure your proposed changes haven't already been addressed.
24
+ 2. Familiarize yourself with our project's code structure and development practices.
25
+ 3. Create a fork of our project and make your changes in a new branch.
26
+ 4. Submit a pull request with a clear and descriptive title, a detailed description of the changes you made, and any relevant screenshots or code snippets.
27
+
28
+ Please note that all code contributions are subject to review and may require changes before they can be merged into the main project.
29
+
30
+ ### Improving Documentation
31
+
32
+ Improving project documentation is also a valuable contribution! If you notice any errors or areas where the documentation could be improved, please create an issue in our project's [issue tracker](https://github.com/MatteoFasulo/ChatGPT-TikTok/issues) or submit a pull request with your proposed changes.
33
+
34
+ ## Code of Conduct
35
+
36
+ Our project has a code of conduct to ensure that all contributors feel welcome and valued. Please review the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file before contributing to our project.
37
+
38
+ ## Conclusion
39
+
40
+ We appreciate your interest in contributing to our project and look forward to your contributions. If you have any questions or need any help, please don't hesitate to reach out to us through the issue tracker or by email.
SECURITY.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ | ------- | ------------------ |
7
+ | 2.0.x | :white_check_mark: |
8
+ | < 1.0 | :x: |
9
+
10
+ ## Reporting a Vulnerability
11
+
12
+ If you discover a security vulnerability in this project, please follow these steps to report it:
13
+
14
+ 1. **Ensure Your Findings Are Valid**: Before reporting a vulnerability, please ensure it is a genuine security issue. Avoid making it public until it has been resolved.
15
+
16
+ 2. **Contact Us Privately**: Send an email to [[email protected]](mailto:[email protected]) with details about the vulnerability. Include the following information:
17
+ - A brief description of the vulnerability.
18
+ - Steps to reproduce or a proof-of-concept.
19
+ - Affected versions (if known).
20
+ - Any additional information that may be relevant.
21
+
22
+ 3. **Expect a Response**: You should receive an acknowledgment of your report within 24 hours. We will work with you to verify and understand the issue.
23
+
24
+ 4. **Resolution Timeline**: The time to resolve the issue may vary depending on its complexity and severity. We will keep you informed of our progress and let you know when we expect to release a fix.
25
+
26
+ 5. **Public Disclosure**: Once the issue is resolved, we will coordinate with you on when and how the vulnerability will be publicly disclosed. We typically aim to do this responsibly and after providing a fix to affected versions.
27
+
28
+ 6. **Credit**: If you report a valid security vulnerability that leads to a fix, you may be eligible for public acknowledgment and credit in our release notes or on our website. Please let us know if you wish to be credited.
29
+
30
+ Thank you for helping us keep our project secure.
example-reddit-post.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ https://www.reddit.com/r/stories/comments/1afvvvu/my_fiance_had_a_special_name_for_me_when_he_first/
2
+ https://www.reddit.com/r/stories/comments/1afzk7v/i_think_my_mom_cheated/
example-video.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"series": "Geständnis eines pensionierten FBI-Agenten", "part": "", "outro": "", "text": "Ich habe früher in einer medizinischen Abteilung eines Krankenhauses im Nordwesten gearbeitet. Die Patienten wurden endoskopischen Untersuchungen unterzogen, für die sie sediert wurden. Bei diesem Fall hatten wir einen älteren Herrn als Patienten, während wir auf den Arzt warteten, unterhielten wir uns freundlich, nichts Ungewöhnliches. Als der Arzt herein kam, fragte er uns sofort, ob der Patient uns von seiner Vergangenheit erzählt hätte. Ich begann damit, die medizinischen Gründe aufzuzählen, warum er das Verfahren hatte, aber der Arzt unterbrach mich und sagte: \"Nein, seine bisherige Arbeit als verdeckter FBI-Mob-Agent.\" Natürlich war meine Reaktion: \"Ohhhhh nein... das muss interessant gewesen sein, mit diesen Mafiagangstern umzugehen.\" Die Antwort des Patienten hat mich bis heute verfolgt. Seine genauen Worte waren:.\"Diese Jungs waren nicht so schlimm wie diese verdammten Politiker.\".Dann erzählte er eine Geschichte davon, wie er Leiter der Sicherheit bei einer \"Kongressveranstaltung\" in den 80er Jahren war. Er sagte, ein anderer Agent habe ihm das Telefon gereicht und gesagt, \"ein Kongressabgeordneter\" wolle mit ihm sprechen..Das Gespräch verlief folgendermaßen:.Kongressabgeordneter: Gibt es dort Frauen?.Agent: Ich weiß nicht, was Sie meinen... aber ja, es sind Frauen hier..Kongressabgeordneter: Wenn ich ankomme, möchte ich eine in mein Zimmer geschickt bekommen... \"NICHT ÄLTER ALS 13\"..Direkt nachdem er das gesagt hatte, gab der Anästhesist ihm die Medikamente, und er verlor das Bewusstsein. Wir standen alle einfach da in Stille, während er das Verfahren durchlief... was zum Teufel hat er uns gerade erzählt?.EDIT: Offenbar bleiben Leute an der Verwendung des Begriffs \"Informant\" hängen... also habe ich ihn entfernt. Für Klarheit, die ich für selbstverständlich hielt.. dieser alte Mann war kein \"Mafia-Informant\" im Sinne eines Mob-Spitzels. Er war ein pensionierter FBI-Agent, der undercover mit der Mafia gearbeitet hat. Zu einer völlig anderen Zeit behauptete er, von einem Kongressabgeordneten nach eine-jährigen Mädchen gefragt worden zu sein. Dieser Mann hatte keinen Grund zu lügen... er hat es nicht einmal erwähnt, der Chirurg hat es..EDIT 2:.Die Geschichte ist zu 100% wahr, so wie sie passiert ist..Könnte der alte Mann lügen, ja natürlich. Glaube ich, dass er gelogen hat, NEIN!.Die Untersuchung war eine EGD \"obere Endoskopie\"..Der Eingriff wurde von einem Chirurgen zur postoperativen Überwachung durchgeführt, nicht von einem Gastroenterologen..Ihr Nörgler müsst verstehen, dass es elitäre Pädophilenringe gibt und sie schon seit langer Zeit existieren..Diejenigen, die dies als eine Unterstützung für eine politische Weltanschauung betrachten, liegen falsch. Ich habe keine politische Zugehörigkeit..Für all diejenigen, die behaupten \"Das FBI ist nicht für Sicherheit verantwortlich und arbeitet nicht mit dem Kongress zusammen.\" Ihr nehmt an, dass ihr wisst, in welcher Funktion dieser Mann während seiner gesamten Karriere gearbeitet hat??.https://de.wikipedia.org/wiki/FBI_Police.\"Aufgaben und Verantwortlichkeiten\".\"Die FBI-Polizei kann gelegentlich bei bedeutenden nationalen Sicherheitsveranstaltungen eingesetzt werden, wie Präsidenteneinführungen, dem Super Bowl, Konferenzen von weltweiten Führern sowie großen politischen Parteikonferenzen.\""}]
example.env ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ REDDIT_USER_AGENT=
2
+ REDDIT_CLIENT_ID=
3
+ REDDIT_CLIENT_SECRET=
4
+
5
+ DEEPL_AUTH_KEY=
6
+
7
+ OPENAI_API_KEY=
main.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ import asyncio
3
+ import json
4
+ import platform
5
+ from dotenv import find_dotenv, load_dotenv
6
+ from utils import *
7
+
8
+ # msg.py
9
+ import msg
10
+
11
+ # logger.py
12
+ from src.logger import setup_logger
13
+
14
+ # arg_parser.py
15
+ from src.arg_parser import parse_args
16
+
17
+ # video_creator.py
18
+ from src.video_creator import VideoCreator
19
+
20
+ # Default directory
21
+ HOME = Path.cwd()
22
+
23
+ # Logging
24
+ logger = setup_logger()
25
+
26
+ # JSON video file
27
+ video_json_path = HOME / 'video.json'
28
+ jsonData = json.loads(video_json_path.read_text(encoding='utf-8'))
29
+
30
+
31
+ #######################
32
+ # CODE #
33
+ #######################
34
+
35
+
36
+ async def main() -> bool:
37
+ console.clear() # Clear terminal
38
+
39
+ args = await parse_args()
40
+ videos = jsonData
41
+
42
+ for video in videos:
43
+ logger.debug('Creating video')
44
+ with console.status(msg.STATUS) as status:
45
+ load_dotenv(find_dotenv()) # Optional
46
+
47
+ console.log(
48
+ f"{msg.OK}Finish loading environment variables")
49
+ logger.info('Finish loading environment variables')
50
+
51
+ video_creator = VideoCreator(video, args)
52
+ video_creator.download_video()
53
+ video_creator.load_model()
54
+ video_creator.create_text()
55
+ await video_creator.text_to_speech()
56
+ video_creator.generate_transcription()
57
+ video_creator.select_background()
58
+ video_creator.integrate_subtitles()
59
+ if args.upload_tiktok:
60
+ video_creator.upload_to_tiktok()
61
+
62
+ console.log(f'{msg.DONE} {str(video_creator.mp4_final_video)}')
63
+ return 0
64
+
65
+
66
+ if __name__ == "__main__":
67
+
68
+ if platform.system() == 'Windows':
69
+ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
70
+
71
+ loop = asyncio.get_event_loop()
72
+
73
+ loop.run_until_complete(main())
74
+
75
+ loop.close()
76
+
77
+ sys.exit(0)
reddit-post.txt ADDED
File without changes
reddit2json.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import praw
2
+ import requests
3
+ import json
4
+ import os
5
+ import re
6
+ from tqdm import tqdm
7
+
8
+ from openai import OpenAI
9
+ from dotenv import load_dotenv
10
+ import argparse
11
+
12
+
13
+ load_dotenv() # take environment variables from .env.
14
+
15
+
16
+ # Parse command-line arguments
17
+ parser = argparse.ArgumentParser(description='Process Reddit posts.')
18
+ parser.add_argument('--method', type=str, default='chat', choices=['translate', 'chat'],
19
+ help='Method to use for processing text. "translate" uses Deepl, "chat" uses GPT-3.5 Turbo.')
20
+ parser.add_argument('--lang', type=str, default='EN',
21
+ help='Target language for translation. Only used if method is "translate".')
22
+ args = parser.parse_args()
23
+
24
+
25
+ client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
26
+
27
+
28
+ def chat_with_gpt3(prompt):
29
+ completion = client.chat.completions.create(
30
+ model="gpt-3.5-turbo",
31
+ messages=[
32
+ {"role": "system", "content": f"You are an assistant that meaningfully translates English Reddit post texts into Language:{args.lang} and optimizes them for text-to-speech. The following is a Reddit post that you should translate and optimize for text-to-speech"},
33
+ {"role": "user", "content": prompt}
34
+ ]
35
+ )
36
+ # Extract the content attribute
37
+ return completion.choices[0].message.content
38
+
39
+
40
+ def get_reddit_post(url):
41
+ reddit = praw.Reddit(
42
+ client_id=os.getenv("REDDIT_CLIENT_ID"),
43
+ client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
44
+ user_agent=os.getenv("REDDIT_USER_AGENT"),
45
+ )
46
+ post = reddit.submission(url=url)
47
+ return post.title, post.selftext
48
+
49
+
50
+ def translate_to_german(text):
51
+ # url = "https://api.deepl.com/v2/translate"
52
+ url = "https://api-free.deepl.com/v2/translate"
53
+ data = {
54
+ "auth_key": os.getenv("DEEPL_AUTH_KEY"),
55
+ "text": text,
56
+ "target_lang": args.lang,
57
+ }
58
+ response = requests.post(url, data=data)
59
+ response_json = response.json()
60
+ return response_json['translations'][0]['text']
61
+
62
+
63
+ def process_text(title, text):
64
+ if args.method == 'translate':
65
+ title = translate_to_german(title)
66
+ text = translate_to_german(text)
67
+ elif args.method == 'chat':
68
+ title = chat_with_gpt3(
69
+ f"Translate the following title into Language: {args.lang} and adjust it so that it is optimized for a lecture by a text-to-speech program. Also remove all parentheses such as (29m) or (M23) or (M25) etc. Also remove all edits from the Reddit post so only the pure text remains:" + "\n\n" + "title" + "\n\n" + "Revised title:")
70
+ text = chat_with_gpt3(
71
+ f"Translate the following text into Language: {args.lang} and adjust it so that it is optimized for a lecture by a text-to-speech program. Also remove all parentheses such as (29m) or (M23) or (M25) or (19) etc. Also remove all edits from the Reddit post so only the pure text remains. Break off the text at the most exciting point to keep the readers very curious:" + "\n\n" + "text" + "\n\n" + "Revised text:")
72
+ return title, text
73
+
74
+
75
+ def modify_json(title_text, part_text, outro_text, main_text):
76
+ data = []
77
+ for i in range(len(title_text)):
78
+ data.append({
79
+ "series": title_text[i],
80
+ "part": part_text[i],
81
+ "outro": outro_text[i],
82
+ "text": main_text[i]
83
+ })
84
+
85
+ with open('./video.json', 'w', encoding='utf-8') as f:
86
+ json.dump(data, f, ensure_ascii=False)
87
+
88
+
89
+ def read_file_line_by_line(file_path):
90
+ with open(file_path, 'r') as file:
91
+ for line in file:
92
+ yield line
93
+
94
+
95
+ title_text = []
96
+ main_text = []
97
+
98
+ # Convert generator to list to get length
99
+ lines = list(read_file_line_by_line('./reddit-post.txt'))
100
+
101
+ for line in tqdm(lines, desc="Processing Reddit posts", unit="post"):
102
+ title, text = get_reddit_post(line)
103
+ title, text = process_text(title, text)
104
+
105
+ title = title.replace('\n\n', '.') # replace '\n\n' with ' ' in title
106
+ text = text.replace('\n\n', '.') # replace '\n\n' with ' ' in text
107
+
108
+ title = title.replace('&#x200B', '') # replace , with '' in title
109
+ text = text.replace('&#x200B', '') # replace , with '' in text
110
+
111
+ # remove gender and age indications from title and text
112
+ title = re.sub(r'\(?\d+\s*[mwMW]\)?', '', title)
113
+ text = re.sub(r'\(?\d+\s*[mwMW]\)?', '', text)
114
+
115
+ # remove gender and age indications where M/W is written before the number
116
+ title = re.sub(r'\(?\s*[mwMW]\s*\d+\)?', '', title)
117
+ text = re.sub(r'\(?\s*[mwMW]\s*\d+\)?', '', text)
118
+
119
+ # remove characters not allowed in a Windows filename from title
120
+ title = re.sub(r'[<>:"/\\|?*,]', '', title)
121
+
122
+ text = text.replace('Edit:', '')
123
+ text = text.replace('edit:', '')
124
+
125
+ title_text.append(title)
126
+ main_text.append(text)
127
+
128
+ # Initialize part_text and outro_text after the loop
129
+ part_text = [""] * len(title_text)
130
+ outro_text = [""] * len(title_text)
131
+
132
+
133
+ modify_json(title_text, part_text, outro_text, main_text)
utils.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import os
3
+ from pathlib import Path
4
+ import random
5
+ import sys
6
+ import ffmpeg
7
+
8
+ from rich.console import Console
9
+
10
+ import msg
11
+ from src.logger import setup_logger
12
+
13
+
14
+ console = Console()
15
+ logger = setup_logger()
16
+
17
+
18
+ class KeepDir:
19
+ def __init__(self):
20
+ self.original_dir = os.getcwd()
21
+
22
+ def __enter__(self):
23
+ return self
24
+
25
+ def __exit__(self, exc_type, exc_val, exc_tb):
26
+ os.chdir(self.original_dir)
27
+
28
+ def chdir(self, path):
29
+ os.chdir(path)
30
+
31
+
32
+ def rich_print(text, style: str = ""):
33
+ console.print(text, style=style)
34
+
35
+
36
+ def random_background(folder: str = "background") -> str:
37
+ """
38
+ Returns the filename of a random file in the specified folder.
39
+
40
+ Args:
41
+ folder(str): The folder containing the files.
42
+
43
+ Returns:
44
+ str: The filename of a randomly selected file in the folder.
45
+ """
46
+ directory = Path(folder).absolute()
47
+ if not directory.exists():
48
+ directory.mkdir()
49
+
50
+ with KeepDir() as keep_dir:
51
+ keep_dir.chdir(folder)
52
+ files = os.listdir(".")
53
+ random_file = random.choice(files)
54
+ return Path(random_file).absolute()
55
+
56
+
57
+ def get_info(filename: str, kind: str):
58
+ global probe
59
+
60
+ try:
61
+ probe = ffmpeg.probe(filename)
62
+ except ffmpeg.Error as e:
63
+ console.log(f"{msg.ERROR}{e.stderr}")
64
+ logger.exception(e.stderr)
65
+ sys.exit(1)
66
+
67
+ if kind == 'video':
68
+ global video_stream
69
+
70
+ # Extract
71
+ for stream in probe['streams']:
72
+ if stream['codec_type'] == 'video':
73
+ video_stream = stream
74
+ break
75
+
76
+ duration = float(video_stream['duration'])
77
+ width = int(video_stream['width'])
78
+ height = int(video_stream['height'])
79
+
80
+ return {'width': width, 'height': height, 'duration': duration}
81
+
82
+ elif kind == 'audio':
83
+ global audio_stream
84
+
85
+ # Extract
86
+ for stream in probe['streams']:
87
+ if stream['codec_type'] == 'audio':
88
+ audio_stream = stream
89
+ break
90
+
91
+ duration = float(audio_stream['duration'])
92
+
93
+ return {'duration': duration}
94
+
95
+
96
+ def convert_time(time_in_seconds):
97
+ """
98
+ Converts time in seconds to a string in the format "hh:mm:ss.mmm".
99
+
100
+ Args:
101
+ time_in_seconds (float): The time in seconds to be converted.
102
+
103
+ Returns:
104
+ str: The time in the format "hh:mm:ss.mmm".
105
+ """
106
+ hours = int(time_in_seconds // 3600)
107
+ minutes = int((time_in_seconds % 3600) // 60)
108
+ seconds = int(time_in_seconds % 60)
109
+ milliseconds = int((time_in_seconds - int(time_in_seconds)) * 1000)
110
+ return f"{hours:02d}:{minutes:02d}:{seconds:02d}.{milliseconds:03d}"
111
+
112
+
113
+ def rgb_to_bgr(rgb: str) -> str:
114
+ """
115
+ Converts a color from RGB to BGR.
116
+
117
+ Args:
118
+ rgb (str): The color in RGB format.
119
+
120
+ Returns:
121
+ str: The color in BGR format.
122
+
123
+ Example:
124
+ >>> rgb_to_bgr("FFF000")
125
+ "000FFF"
126
+ """
127
+
128
+ return rgb[4:6] + rgb[2:4] + rgb[0:2]
video.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "series": "Crazy facts that you did not know",
4
+ "part": "4",
5
+ "outro": "Follow us for more",
6
+ "text": "Sku",
7
+ "tags": [
8
+ "chess",
9
+ "facts",
10
+ "crazy"
11
+ ]
12
+ },
13
+ {
14
+ "series": "Crazy facts that you did not know",
15
+ "part": "5",
16
+ "outro": "Follow us for more",
17
+ "text": "Test"
18
+ }
19
+ ]