gabriel chua commited on
Commit
9c20b4e
1 Parent(s): 3cf1f43

add opensouce version

Browse files
Files changed (11) hide show
  1. .dockerignore +0 -164
  2. Dockerfile +0 -11
  3. description.md +0 -5
  4. docker-compose.yml +0 -8
  5. head.html +0 -49
  6. main.py +46 -113
  7. prompts.py +43 -0
  8. requirements.txt +8 -8
  9. static/icon.png +0 -0
  10. static/logo.png +0 -0
  11. utils.py +71 -0
.dockerignore DELETED
@@ -1,164 +0,0 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py,cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # poetry
98
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
- #poetry.lock
103
-
104
- # pdm
105
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
- #pdm.lock
107
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
- # in version control.
109
- # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
- .pdm.toml
111
- .pdm-python
112
- .pdm-build/
113
-
114
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
- __pypackages__/
116
-
117
- # Celery stuff
118
- celerybeat-schedule
119
- celerybeat.pid
120
-
121
- # SageMath parsed files
122
- *.sage.py
123
-
124
- # Environments
125
- .env
126
- .venv
127
- env/
128
- venv/
129
- ENV/
130
- env.bak/
131
- venv.bak/
132
-
133
- # Spyder project settings
134
- .spyderproject
135
- .spyproject
136
-
137
- # Rope project settings
138
- .ropeproject
139
-
140
- # mkdocs documentation
141
- /site
142
-
143
- # mypy
144
- .mypy_cache/
145
- .dmypy.json
146
- dmypy.json
147
-
148
- # Pyre type checker
149
- .pyre/
150
-
151
- # pytype static type analyzer
152
- .pytype/
153
-
154
- # Cython debug symbols
155
- cython_debug/
156
-
157
- # PyCharm
158
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
- # and can be added to the global gitignore or merged into this file. For a more nuclear
161
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
- #.idea/
163
-
164
- gradio_cached_examples/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Dockerfile DELETED
@@ -1,11 +0,0 @@
1
- FROM python:3.12-slim
2
-
3
- RUN pip install uv
4
- RUN uv venv
5
-
6
- COPY requirements.txt .
7
- RUN uv pip install -r requirements.txt
8
-
9
- COPY . .
10
-
11
- CMD .venv/bin/granian --interface asgi --port 8080 --host 0.0.0.0 main:app
 
 
 
 
 
 
 
 
 
 
 
 
description.md DELETED
@@ -1,5 +0,0 @@
1
- <p style="text-align:center">
2
- <strong>Convert any PDF into a podcast episode! Experience research papers, websites, and more in a whole new way.</strong>
3
- <br>
4
- <a href="https://github.com/knowsuchagency/pdf-to-podcast">knowsuchagency/pdf-to-podcast</a>
5
- </p>
 
 
 
 
 
 
docker-compose.yml DELETED
@@ -1,8 +0,0 @@
1
- services:
2
- web:
3
- build: .
4
- ports:
5
- - "8080:8080"
6
- environment:
7
- GEMINI_API_KEY: ${GEMINI_API_KEY}
8
-
 
 
 
 
 
 
 
 
 
head.html DELETED
@@ -1,49 +0,0 @@
1
- <!-- Primary Meta Tags -->
2
- <title>PDF to Podcast - Convert Your Documents to Audio</title>
3
- <meta name="title" content="PDF to Podcast - Convert Your Documents to Audio" />
4
- <meta
5
- name="description"
6
- content="Easily convert your PDF documents into audio podcasts. Perfect for listening on the go and making content more accessible."
7
- />
8
-
9
- <!-- Open Graph / Facebook -->
10
- <meta property="og:type" content="website" />
11
- <meta property="og:url" content="https://pdf-to-podcast.com/" />
12
- <meta
13
- property="og:title"
14
- content="PDF to Podcast - Convert Your Documents to Audio"
15
- />
16
- <meta
17
- property="og:description"
18
- content="Easily convert your PDF documents into audio podcasts. Perfect for listening on the go and making content more accessible."
19
- />
20
- <meta
21
- property="og:image"
22
- content="https://pdf-to-podcast.com/static/logo.png"
23
- />
24
-
25
- <!-- Twitter -->
26
- <meta property="twitter:card" content="summary_large_image" />
27
- <meta property="twitter:url" content="https://pdf-to-podcast.com/" />
28
- <meta
29
- property="twitter:title"
30
- content="PDF to Podcast - Convert Your Documents to Audio"
31
- />
32
- <meta
33
- property="twitter:description"
34
- content="Easily convert your PDF documents into audio podcasts. Perfect for listening on the go and making content more accessible."
35
- />
36
- <meta
37
- property="twitter:image"
38
- content="https://pdf-to-podcast.com/static/logo.png"
39
- />
40
-
41
- <!-- Additional Meta Tags -->
42
- <meta name="viewport" content="width=device-width, initial-scale=1" />
43
- <meta charset="UTF-8" />
44
- <meta name="author" content="Stephan Fitzpatrick" />
45
- <meta
46
- name="keywords"
47
- content="PDF to Podcast, PDF to audio, document to podcast, audio conversion, podcast creation, accessible content"
48
- />
49
- <link rel="icon" href="/static/icon.png" type="image/png" />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main.py CHANGED
@@ -1,25 +1,27 @@
1
- import concurrent.futures as cf
 
 
 
 
2
  import glob
3
- import io
4
  import os
5
  import time
6
  from pathlib import Path
7
  from tempfile import NamedTemporaryFile
8
- from typing import List, Literal
9
 
 
10
  import gradio as gr
11
- import sentry_sdk
12
  from fastapi import FastAPI
13
  from fastapi.staticfiles import StaticFiles
14
  from loguru import logger
15
- from openai import OpenAI
16
- from promptic import llm
17
- from pydantic import BaseModel, ValidationError
18
  from pypdf import PdfReader
19
- from tenacity import retry, retry_if_exception_type
20
-
21
 
22
- sentry_sdk.init(os.getenv("SENTRY_DSN"))
 
 
23
 
24
  app = FastAPI()
25
 
@@ -27,158 +29,89 @@ app.mount("/static", StaticFiles(directory="static"), name="static")
27
 
28
 
29
  class DialogueItem(BaseModel):
30
- text: str
31
- speaker: Literal["female-1", "male-1", "female-2"]
32
 
33
- @property
34
- def voice(self):
35
- return {
36
- "female-1": "alloy",
37
- "male-1": "onyx",
38
- "female-2": "shimmer",
39
- }[self.speaker]
40
 
41
 
42
  class Dialogue(BaseModel):
 
 
43
  scratchpad: str
 
44
  dialogue: List[DialogueItem]
45
 
46
 
47
- def get_mp3(text: str, voice: str, api_key: str = None) -> bytes:
48
- client = OpenAI(
49
- api_key=api_key or os.getenv("OPENAI_API_KEY"),
50
- )
51
-
52
- with client.audio.speech.with_streaming_response.create(
53
- model="tts-1",
54
- voice=voice,
55
- input=text,
56
- ) as response:
57
- with io.BytesIO() as file:
58
- for chunk in response.iter_bytes():
59
- file.write(chunk)
60
- return file.getvalue()
61
-
62
-
63
- def generate_audio(file: str, openai_api_key: str = None) -> bytes:
64
-
65
- if not os.getenv("OPENAI_API_KEY", openai_api_key):
66
- raise gr.Error("OpenAI API key is required")
67
-
68
  with Path(file).open("rb") as f:
69
  reader = PdfReader(f)
70
  text = "\n\n".join([page.extract_text() for page in reader.pages])
71
 
72
- @retry(retry=retry_if_exception_type(ValidationError))
73
- @llm(
74
- model="gpt-4o-mini",
75
- )
76
- def generate_dialogue(text: str) -> Dialogue:
77
- """
78
- Your task is to take the input text provided and turn it into an engaging, informative podcast dialogue. The input text may be messy or unstructured, as it could come from a variety of sources like PDFs or web pages. Don't worry about the formatting issues or any irrelevant information; your goal is to extract the key points and interesting facts that could be discussed in a podcast.
79
-
80
- Here is the input text you will be working with:
81
-
82
- <input_text>
83
- {text}
84
- </input_text>
85
-
86
- First, carefully read through the input text and identify the main topics, key points, and any interesting facts or anecdotes. Think about how you could present this information in a fun, engaging way that would be suitable for an audio podcast.
87
 
88
- <scratchpad>
89
- Brainstorm creative ways to discuss the main topics and key points you identified in the input text. Consider using analogies, storytelling techniques, or hypothetical scenarios to make the content more relatable and engaging for listeners.
90
-
91
- Keep in mind that your podcast should be accessible to a general audience, so avoid using too much jargon or assuming prior knowledge of the topic. If necessary, think of ways to briefly explain any complex concepts in simple terms.
92
-
93
- Use your imagination to fill in any gaps in the input text or to come up with thought-provoking questions that could be explored in the podcast. The goal is to create an informative and entertaining dialogue, so feel free to be creative in your approach.
94
-
95
- Write your brainstorming ideas and a rough outline for the podcast dialogue here. Be sure to note the key insights and takeaways you want to reiterate at the end.
96
- </scratchpad>
97
-
98
- Now that you have brainstormed ideas and created a rough outline, it's time to write the actual podcast dialogue. Aim for a natural, conversational flow between the host and any guest speakers. Incorporate the best ideas from your brainstorming session and make sure to explain any complex topics in an easy-to-understand way.
99
-
100
- <podcast_dialogue>
101
- Write your engaging, informative podcast dialogue here, based on the key points and creative ideas you came up with during the brainstorming session. Use a conversational tone and include any necessary context or explanations to make the content accessible to a general audience. Use made-up names for the hosts and guests to create a more engaging and immersive experience for listeners. Do not include any bracketed placeholders like [Host] or [Guest]. Design your output to be read aloud -- it will be directly converted into audio.
102
-
103
- Make the dialogue as long and detailed as possible, while still staying on topic and maintaining an engaging flow. Aim to use your full output capacity to create the longest podcast episode you can, while still communicating the key information from the input text in an entertaining way.
104
-
105
- At the end of the dialogue, have the host and guest speakers naturally summarize the main insights and takeaways from their discussion. This should flow organically from the conversation, reiterating the key points in a casual, conversational manner. Avoid making it sound like an obvious recap - the goal is to reinforce the central ideas one last time before signing off.
106
- </podcast_dialogue>
107
- """
108
-
109
- llm_output = generate_dialogue(text)
110
-
111
- audio = b""
112
  transcript = ""
 
113
 
114
- characters = 0
115
-
116
- with cf.ThreadPoolExecutor() as executor:
117
- futures = []
118
- for line in llm_output.dialogue:
119
- transcript_line = f"{line.speaker}: {line.text}"
120
- future = executor.submit(get_mp3, line.text, line.voice, openai_api_key)
121
- futures.append((future, transcript_line))
122
- characters += len(line.text)
123
 
124
- for future, transcript_line in futures:
125
- audio_chunk = future.result()
126
- audio += audio_chunk
127
- transcript += transcript_line + "\n\n"
 
128
 
129
- logger.info(f"Generated {characters} characters of audio")
 
130
 
 
131
  temporary_directory = "./gradio_cached_examples/tmp/"
132
  os.makedirs(temporary_directory, exist_ok=True)
133
 
134
- # we use a temporary file because Gradio's audio component doesn't work with raw bytes in Safari
135
  temporary_file = NamedTemporaryFile(
136
  dir=temporary_directory,
137
  delete=False,
138
  suffix=".mp3",
139
  )
140
- temporary_file.write(audio)
141
- temporary_file.close()
142
 
143
  # Delete any files in the temp directory that end with .mp3 and are over a day old
144
  for file in glob.glob(f"{temporary_directory}*.mp3"):
145
  if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
146
  os.remove(file)
147
 
 
 
148
  return temporary_file.name, transcript
149
 
150
 
151
  demo = gr.Interface(
152
- title="PDF to Podcast",
153
- description=Path("description.md").read_text(),
154
- fn=generate_audio,
155
- examples=[[str(p)] for p in Path("examples").glob("*.pdf")],
156
  inputs=[
157
  gr.File(
158
  label="PDF",
159
  ),
160
- gr.Textbox(
161
- label="OpenAI API Key",
162
- visible=not os.getenv("OPENAI_API_KEY"),
163
- ),
164
  ],
165
  outputs=[
166
  gr.Audio(label="Audio", format="mp3"),
167
  gr.Textbox(label="Transcript"),
168
  ],
169
  allow_flagging="never",
170
- clear_btn=None,
171
- head=os.getenv("HEAD", "") + Path("head.html").read_text(),
172
- cache_examples="lazy",
173
  api_name=False,
174
  )
175
 
176
-
177
- demo = demo.queue(
178
- max_size=20,
179
- default_concurrency_limit=20,
180
- )
181
-
182
  app = gr.mount_gradio_app(app, demo, path="/")
183
 
184
  if __name__ == "__main__":
 
1
+ """
2
+ main.py
3
+ """
4
+
5
+ # Standard library imports
6
  import glob
 
7
  import os
8
  import time
9
  from pathlib import Path
10
  from tempfile import NamedTemporaryFile
11
+ from typing import List, Literal, Tuple
12
 
13
+ # Third-party imports
14
  import gradio as gr
 
15
  from fastapi import FastAPI
16
  from fastapi.staticfiles import StaticFiles
17
  from loguru import logger
18
+ from pydantic import BaseModel
 
 
19
  from pypdf import PdfReader
20
+ from pydub import AudioSegment
 
21
 
22
+ # Local imports
23
+ from prompts import SYSTEM_PROMPT
24
+ from utils import generate_script, generate_audio
25
 
26
  app = FastAPI()
27
 
 
29
 
30
 
31
  class DialogueItem(BaseModel):
32
+ """A single dialogue item."""
 
33
 
34
+ speaker: Literal["Host (Jane)", "Guest"]
35
+ text: str
 
 
 
 
 
36
 
37
 
38
  class Dialogue(BaseModel):
39
+ """The dialogue between the host and guest."""
40
+
41
  scratchpad: str
42
+ participants: List[str]
43
  dialogue: List[DialogueItem]
44
 
45
 
46
+ def generate_podcast(file: str) -> Tuple[str, str]:
47
+ """Generate the audio and transcript from the PDF."""
48
+ # Read the PDF file and extract text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  with Path(file).open("rb") as f:
50
  reader = PdfReader(f)
51
  text = "\n\n".join([page.extract_text() for page in reader.pages])
52
 
53
+ # Call the LLM
54
+ llm_output = generate_script(SYSTEM_PROMPT, text, Dialogue)
55
+ logger.info(f"Generated dialogue: {llm_output}")
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
+ # Process the dialogue
58
+ audio_segments = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  transcript = ""
60
+ total_characters = 0
61
 
62
+ for line in llm_output.dialogue:
63
+ logger.info(f"Generating audio for {line.speaker}: {line.text}")
64
+ transcript_line = f"{line.speaker}: {line.text}"
65
+ transcript += transcript_line + "\n\n"
66
+ total_characters += len(line.text)
 
 
 
 
67
 
68
+ # Get audio file path
69
+ audio_file_path = generate_audio(line.text, line.speaker)
70
+ # Read the audio file into an AudioSegment
71
+ audio_segment = AudioSegment.from_file(audio_file_path)
72
+ audio_segments.append(audio_segment)
73
 
74
+ # Concatenate all audio segments
75
+ combined_audio = sum(audio_segments)
76
 
77
+ # Export the combined audio to a temporary file
78
  temporary_directory = "./gradio_cached_examples/tmp/"
79
  os.makedirs(temporary_directory, exist_ok=True)
80
 
 
81
  temporary_file = NamedTemporaryFile(
82
  dir=temporary_directory,
83
  delete=False,
84
  suffix=".mp3",
85
  )
86
+ combined_audio.export(temporary_file.name, format="mp3")
 
87
 
88
  # Delete any files in the temp directory that end with .mp3 and are over a day old
89
  for file in glob.glob(f"{temporary_directory}*.mp3"):
90
  if os.path.isfile(file) and time.time() - os.path.getmtime(file) > 24 * 60 * 60:
91
  os.remove(file)
92
 
93
+ logger.info(f"Generated {total_characters} characters of audio")
94
+
95
  return temporary_file.name, transcript
96
 
97
 
98
  demo = gr.Interface(
99
+ title="OpenPodcast",
100
+ description="Convert your PDFs into podcasts with open-source AI models.",
101
+ fn=generate_podcast,
 
102
  inputs=[
103
  gr.File(
104
  label="PDF",
105
  ),
 
 
 
 
106
  ],
107
  outputs=[
108
  gr.Audio(label="Audio", format="mp3"),
109
  gr.Textbox(label="Transcript"),
110
  ],
111
  allow_flagging="never",
 
 
 
112
  api_name=False,
113
  )
114
 
 
 
 
 
 
 
115
  app = gr.mount_gradio_app(app, demo, path="/")
116
 
117
  if __name__ == "__main__":
prompts.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ prompts.py
3
+ """
4
+
5
+ SYSTEM_PROMPT = """
6
+ You are a world-class podcast producer.
7
+ Your task is to transform the provided input text into an engaging and informative podcast script.
8
+ You will receive as input a text that may be unstructured or messy, sourced from places like PDFs or web pages. Ignore irrelevant information or formatting issues. Y
9
+ Your focus is on extracting the most interesting and insightful content for a podcast discussion.
10
+
11
+ # Steps to Follow:
12
+
13
+ 1. **Analyze the Input:**
14
+ Carefully read the input text. Identify the key topics, points, and any interesting facts or anecdotes that could drive a compelling podcast conversation.
15
+
16
+ 2. **Brainstorm Ideas:**
17
+ In the `<scratchpad>`, brainstorm creative ways to present the key points in an engaging manner. Think of analogies, storytelling techniques, or hypothetical scenarios to make the content relatable and entertaining for listeners.
18
+
19
+ - Keep the discussion accessible to a general audience. Avoid jargon and briefly explain complex concepts in simple terms.
20
+ - Use imagination to fill in any gaps or create thought-provoking questions to explore during the podcast.
21
+ - Your aim is to create an entertaining and informative podcast, so feel free to be creative with your approach.
22
+
23
+ 3. **Write the Dialogue:**
24
+ Now, develop the podcast dialogue. Aim for a natural, conversational flow between the host (named Jane) and the guest speaker (the author of the input text, if mentioned).
25
+
26
+ - Use the best ideas from your brainstorming session.
27
+ - Ensure complex topics are explained clearly and simply.
28
+ - Focus on maintaining an engaging and lively tone that would captivate listeners.
29
+ - Rules:
30
+ > The host should go first.
31
+ > The host should ask the guest questions.
32
+ > The host should summarize the key insights at the end.
33
+ > Include common verbal fillers like "uhms" and "errs" in the host and guests response. This is so the script is realistic.
34
+ > The host and guest can interrupt each other.
35
+ > The guest must NOT include marketing or self-promotional content.
36
+ > The guest must NOT include any material NOT substantiated within the input text.
37
+ > This is to be a PG conversation.
38
+
39
+ 4. **Wrap it Up:**
40
+ At the end of the dialogue, the host and guest should naturally summarize the key insights. This should feel like a casual conversation, rather than a formal recap, reinforcing the main points one last time before signing off.
41
+
42
+ ALWAYS REPLY IN VALID JSON, AND NO CODE BLOCKS. BEGIN DIRECTLY WITH THE JSON OUTPUT.
43
+ """
requirements.txt CHANGED
@@ -1,9 +1,9 @@
1
- gradio~=4.36
 
 
 
2
  promptic==0.7.5
3
- pydantic~=2.7
4
- google-generativeai~=0.6
5
- loguru~=0.7
6
- pypdf~=4.1
7
- tenacity~=8.3
8
- sentry-sdk~=2.5
9
- granian~=1.4
 
1
+ gradio==4.44.0
2
+ granian==1.4
3
+ loguru==0.7
4
+ openai==1.50.2
5
  promptic==0.7.5
6
+ pydantic==2.7
7
+ pypdf==4.1
8
+ sentry-sdk==2.5
9
+ tenacity==8.3
 
 
 
static/icon.png DELETED
Binary file (1.34 kB)
 
static/logo.png DELETED
Binary file (134 kB)
 
utils.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ utils.py
3
+
4
+ Functions:
5
+ - get_script: Get the dialogue from the LLM.
6
+ - call_llm: Call the LLM with the given prompt and dialogue format.
7
+ - get_audio: Get the audio from the TTS model from HF Spaces.
8
+ """
9
+
10
+ import os
11
+
12
+ from gradio_client import Client
13
+ from openai import OpenAI
14
+ from pydantic import ValidationError
15
+
16
+ client = OpenAI(
17
+ base_url="https://api.fireworks.ai/inference/v1",
18
+ api_key=os.getenv("FIREWORKS_API_KEY"),
19
+ )
20
+
21
+ hf_client = Client("mrfakename/MeloTTS")
22
+
23
+
24
+ def generate_script(system_prompt: str, text: str, dialogue_format):
25
+ """Get the dialogue from the LLM."""
26
+ # Load as python object
27
+ try:
28
+ response = call_llm(system_prompt, text, dialogue_format)
29
+ dialogue = dialogue_format.model_validate_json(
30
+ response.choices[0].message.content
31
+ )
32
+ except ValidationError as e:
33
+ error_message = f"Failed to parse dialogue JSON: {e}"
34
+ system_prompt_with_error = f"{system_prompt}\n\n Please return a VALID JSON object. This was the earlier error: {error_message}"
35
+ response = call_llm(system_prompt_with_error, text, dialogue_format)
36
+ dialogue = dialogue_format.model_validate_json(
37
+ response.choices[0].message.content
38
+ )
39
+ return dialogue
40
+
41
+
42
+ def call_llm(system_prompt: str, text: str, dialogue_format):
43
+ """Call the LLM with the given prompt and dialogue format."""
44
+ response = client.chat.completions.create(
45
+ messages=[
46
+ {"role": "system", "content": system_prompt},
47
+ {"role": "user", "content": text},
48
+ ],
49
+ model="accounts/fireworks/models/llama-v3p1-405b-instruct",
50
+ max_tokens=16_384,
51
+ temperature=0.1,
52
+ response_format={
53
+ "type": "json_object",
54
+ "schema": dialogue_format.model_json_schema(),
55
+ },
56
+ )
57
+ return response
58
+
59
+
60
+ def generate_audio(text: str, speaker: str) -> str:
61
+ """Get the audio from the TTS model from HF Spaces."""
62
+ if speaker == "Guest":
63
+ accent = "EN-US"
64
+ speed = 0.9
65
+ else: # host
66
+ accent = "EN-Default"
67
+ speed = 1
68
+ result = hf_client.predict(
69
+ text=text, language="EN", speaker=accent, speed=speed, api_name="/synthesize"
70
+ )
71
+ return result