Deepakkori45 commited on
Commit
cf7b164
·
verified ·
1 Parent(s): 97a1e81

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import openai
4
+ import os
5
+ from pydub import AudioSegment
6
+ from dotenv import load_dotenv
7
+ from tempfile import NamedTemporaryFile
8
+ import math
9
+ from docx import Document
10
+
11
+ # Load environment variables from .env file
12
+ load_dotenv()
13
+
14
+ # Set your OpenAI API key
15
+ openai.api_key = os.getenv("OPENAI_API_KEY")
16
+
17
+ def get_chunk_length_ms(file_path, target_size_mb):
18
+ """
19
+ Calculate the length of each chunk in milliseconds to create chunks of approximately target_size_mb.
20
+
21
+ Args:
22
+ file_path (str): Path to the audio file.
23
+ target_size_mb (int): Target size of each chunk in megabytes.
24
+
25
+ Returns:
26
+ int: Chunk length in milliseconds.
27
+ """
28
+ audio = AudioSegment.from_file(file_path)
29
+ file_size_bytes = os.path.getsize(file_path)
30
+ duration_ms = len(audio)
31
+
32
+ # Calculate the approximate duration per byte
33
+ duration_per_byte = duration_ms / file_size_bytes
34
+
35
+ # Calculate the chunk length in milliseconds for the target size
36
+ chunk_length_ms = target_size_mb * 1024 * 1024 * duration_per_byte
37
+ return math.floor(chunk_length_ms)
38
+
39
+ def split_audio(audio_file_path, chunk_length_ms):
40
+ """
41
+ Split an audio file into chunks of specified length.
42
+
43
+ Args:
44
+ audio_file_path (str): Path to the audio file.
45
+ chunk_length_ms (int): Length of each chunk in milliseconds.
46
+
47
+ Returns:
48
+ list: List of AudioSegment chunks.
49
+ """
50
+ audio = AudioSegment.from_file(audio_file_path)
51
+
52
+ chunks = [audio[i:i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
53
+
54
+ return chunks
55
+
56
+ def transcribe(audio_file):
57
+ """
58
+ Transcribe an audio file using OpenAI Whisper model.
59
+
60
+ Args:
61
+ audio_file (str): Path to the audio file.
62
+
63
+ Returns:
64
+ str: Transcribed text.
65
+ """
66
+ with open(audio_file, "rb") as audio:
67
+ response = openai.audio.transcriptions.create(
68
+ model="whisper-1",
69
+ file=audio,
70
+ response_format="text",
71
+ language="en" # Ensures transcription is in English
72
+ )
73
+ return response
74
+
75
+ def process_audio_chunks(audio_chunks):
76
+ """
77
+ Process and transcribe each audio chunk.
78
+
79
+ Args:
80
+ audio_chunks (list): List of AudioSegment chunks.
81
+
82
+ Returns:
83
+ str: Combined transcription from all chunks.
84
+ """
85
+ transcriptions = []
86
+ min_length_ms = 100 # Minimum length required by OpenAI API (0.1 seconds)
87
+
88
+ for i, chunk in enumerate(audio_chunks):
89
+ if len(chunk) < min_length_ms:
90
+ st.warning(f"Chunk {i} is too short to be processed.")
91
+ continue
92
+
93
+ with NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio_file:
94
+ chunk.export(temp_audio_file.name, format="wav")
95
+ temp_audio_file_path = temp_audio_file.name
96
+
97
+ transcription = transcribe(temp_audio_file_path)
98
+ if transcription:
99
+ transcriptions.append(transcription)
100
+ st.write(f"Transcription for chunk {i}: {transcription}")
101
+
102
+ os.remove(temp_audio_file_path)
103
+ return " ".join(transcriptions)
104
+
105
+ def save_transcription_to_docx(transcription, audio_file_path):
106
+ """
107
+ Save the transcription as a .docx file.
108
+
109
+ Args:
110
+ transcription (str): Transcribed text.
111
+ audio_file_path (str): Path to the original audio file for naming purposes.
112
+
113
+ Returns:
114
+ str: Path to the saved .docx file.
115
+ """
116
+ # Extract the base name of the audio file (without extension)
117
+ base_name = os.path.splitext(os.path.basename(audio_file_path))[0]
118
+
119
+ # Create a new file name by appending "_full_transcription" with .docx extension
120
+ output_file_name = f"{base_name}_full_transcription.docx"
121
+
122
+ # Create a new Document object
123
+ doc = Document()
124
+
125
+ # Add the transcription text to the document
126
+ doc.add_paragraph(transcription)
127
+
128
+ # Save the document in .docx format
129
+ doc.save(output_file_name)
130
+
131
+ return output_file_name
132
+
133
+ st.title("Audio Transcription with OpenAI's Whisper")
134
+
135
+ # uploaded_file = st.file_uploader("Upload an audio file", type=["wav", "mp3", "ogg", "m4a"])
136
+ uploaded_file = st.file_uploader("Upload an audio or video file", type=["wav", "mp3", "ogg", "m4a", "mp4", "mov"])
137
+
138
+
139
+ if 'transcription' not in st.session_state:
140
+ st.session_state.transcription = None
141
+
142
+ if uploaded_file is not None and st.session_state.transcription is None:
143
+ st.audio(uploaded_file)
144
+
145
+ # Save uploaded file temporarily
146
+ file_extension = uploaded_file.name.split(".")[-1]
147
+ original_file_name = uploaded_file.name.rsplit('.', 1)[0] # Get the original file name without extension
148
+ temp_audio_file = f"temp_audio_file.{file_extension}"
149
+ with open(temp_audio_file, "wb") as f:
150
+ f.write(uploaded_file.getbuffer())
151
+
152
+ # Split and process audio
153
+ with st.spinner('Transcribing...'):
154
+ chunk_length_ms = get_chunk_length_ms(temp_audio_file, target_size_mb=1)
155
+ audio_chunks = split_audio(temp_audio_file, chunk_length_ms)
156
+ transcription = process_audio_chunks(audio_chunks)
157
+ if transcription:
158
+ st.session_state.transcription = transcription
159
+ st.success('Transcription complete!')
160
+
161
+ # Save transcription to a Word (.docx) file
162
+ output_docx_file = save_transcription_to_docx(transcription, uploaded_file.name)
163
+ st.session_state.output_docx_file = output_docx_file
164
+
165
+ # Clean up temporary file
166
+ if os.path.exists(temp_audio_file):
167
+ os.remove(temp_audio_file)
168
+
169
+ if st.session_state.transcription:
170
+ st.text_area("Transcription", st.session_state.transcription, key="transcription_area_final")
171
+
172
+ # Download the transcription as a .docx file
173
+ with open(st.session_state.output_docx_file, "rb") as docx_file:
174
+ st.download_button(
175
+ label="Download Transcription (.docx)",
176
+ data=docx_file,
177
+ file_name=st.session_state.output_docx_file,
178
+ mime='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
179
+ )
180
+