import os import time import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from email.mime.application import MIMEApplication import openai import assemblyai as aai import gspread from oauth2client.service_account import ServiceAccountCredentials import gradio as gr import gdown # Constants ASSEMBLYAI_API_KEY = '4c882b89eb0045a49acb49cc6c4fccae' OPENAI_API_KEY = 'sk-proj-mx72xavCVSemYZKenHscT3BlbkFJT3TrTtBXIWARtGVAYNUa' SPREADSHEET_ID = '1zEXS2rkTLCm9MukYvlKH44xcYe3OZ4_7ySpPcp3yQ1k' SHEET_NAME = 'Form Responses 2' OUTPUT_DIR = 'downloads' EMAIL_ADDRESS = 'mohammad.agwan@somaiya.edu' EMAIL_PASSWORD = 'ijpo nrpj xpxv bqgn' GOOGLE_CREDS_PATH = './optical-mode-424508-f5-ab5944b9ea6a.json' SMTP_PORT = 1025 SMTP_SERVER = 'smtp.gmail.com' # Function to download MP3 file from Google Drive def download_mp3_from_drive(drive_link, output_file): try: print(f"Downloading file from: {drive_link}") file_id = drive_link.split('id=')[-1] url = f"https://drive.google.com/uc?export=download&id={file_id}" gdown.download(url, output_file, quiet=False) if os.path.exists(output_file): print(f"File downloaded successfully: {output_file}") return "File downloaded successfully" else: print("File download failed.") return "File download failed" except Exception as e: print(f"Error downloading file: {e}") return f"Error downloading file: {e}" # Function to transcribe audio using AssemblyAI def transcribe_audio(audio_file, assemblyai_api_key): try: print(f"Transcribing audio file: {audio_file}") aai.settings.api_key = assemblyai_api_key transcriber = aai.Transcriber() config = aai.TranscriptionConfig(speaker_labels=True) transcript = transcriber.transcribe(audio_file, config) if transcript.status != 'completed': print(f"Transcription failed with status: {transcript.status}") return None, f"Transcription failed with status: {transcript.status}" current_time = transcript.utterances[0].start / 60000 transcript_with_timestamps = "" current_speaker = None for utterance in transcript.utterances: if utterance.speaker != current_speaker: transcript_with_timestamps += "\n" current_speaker = utterance.speaker duration_minutes = (utterance.end - utterance.start) / 60000 minutes = int(current_time) seconds = int((current_time - minutes) * 60) timestamp = f"{minutes:02d}:{seconds:02d}" transcript_with_timestamps += f"[{timestamp}] Speaker {utterance.speaker}: {utterance.text}\n" current_time += duration_minutes print("Transcription completed successfully.") return transcript_with_timestamps, "Transcription completed successfully" except Exception as e: print(f"Error transcribing file: {e}") return None, f"Error transcribing file: {e}" # Function to summarize transcription using OpenAI def summarize_transcription(transcription, api_key): try: print("Summarizing transcription...") openai.api_key = api_key response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": "Summarize the following transcription."}, {"role": "user", "content": transcription} ], max_tokens=150 ) summary = response.choices[0].message['content'].strip() print("Summary generated successfully.") return summary, "Summary generated successfully" except Exception as e: print(f"Error summarizing transcription: {e}") return None, f"Error summarizing transcription: {e}" # Function to check for new links in Google Sheets def check_for_new_links(sheet): file_urls = [] try: records = sheet.get_all_records() print(f"Sheet records: {records}") for record in records: print(f"Checking record: {record}") if record.get('Processed') == 'FALSE': file_urls.append(record) print(f"New file URLs found: {file_urls}") return file_urls, "New file URLs found" except Exception as e: print(f"Error reading sheet: {e}") return [], f"Error reading sheet: {e}" # Function to send email with transcription and summary def send_email(to_email, transcription, summary): try: transcription_file_path = os.path.join(OUTPUT_DIR, 'transcription.txt') summary_file_path = os.path.join(OUTPUT_DIR, 'summary.txt') with open(transcription_file_path, 'w') as file: file.write(transcription) with open(summary_file_path, 'w') as file: file.write(summary) msg = MIMEMultipart() msg['From'] = EMAIL_ADDRESS msg['To'] = to_email msg['Subject'] = 'Your Audio Transcription and Summary' msg.attach(MIMEText('Please find the attached transcription and summary of your audio file.', 'plain')) with open(transcription_file_path, 'rb') as file: attachment = MIMEApplication(file.read(), _subtype='txt') attachment.add_header('Content-Disposition', 'attachment', filename='transcription.txt') msg.attach(attachment) with open(summary_file_path, 'rb') as file: attachment = MIMEApplication(file.read(), _subtype='txt') attachment.add_header('Content-Disposition', 'attachment', filename='summary.txt') msg.attach(attachment) print("Connecting to SMTP server...") with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as server: server.set_debuglevel(1) # Enable debug output for SMTP server.starttls() print("Logging in to SMTP server...") server.login(EMAIL_ADDRESS, EMAIL_PASSWORD) print("Sending email...") server.sendmail(EMAIL_ADDRESS, to_email, msg.as_string()) print(f"Email sent successfully to {to_email}") os.remove(transcription_file_path) os.remove(summary_file_path) return "Email sent successfully" except smtplib.SMTPAuthenticationError: print("SMTP Authentication Error: Check your email address and password.") return "SMTP Authentication Error: Check your email address and password." except smtplib.SMTPConnectError: print("SMTP Connection Error: Unable to connect to the SMTP server.") return "SMTP Connection Error: Unable to connect to the SMTP server." except smtplib.SMTPException as e: print(f"SMTP Error: {e}") return f"SMTP Error: {e}" except Exception as e: print(f"Error sending email: {e}") return f"Error sending email: {e}" # Function to update Google Sheet after processing def update_sheet(sheet, file_url): try: print(f"Updating sheet for file: {file_url}") cell = sheet.find(file_url) if cell: row = cell.row sheet.update_cell(row, 3, 'TRUE') print(f"Sheet updated successfully for file: {file_url}") return "Sheet updated successfully" except Exception as e: print(f"Error updating sheet: {e}") return f"Error updating sheet: {e}" # Main processing function def process_link(record, sheet): link = record['Call Recording'] email = record['Email'] print(f"Processing link: {link}") output_file = os.path.join(OUTPUT_DIR, 'downloaded_audio.mp3') download_status = download_mp3_from_drive(link, output_file) if "Error" in download_status or "failed" in download_status: return download_status transcription_result, transcription_status = transcribe_audio(output_file, ASSEMBLYAI_API_KEY) if not transcription_result: return transcription_status print(f"Transcription result: {transcription_result}") summary_result, summary_status = summarize_transcription(transcription_result, OPENAI_API_KEY) if not summary_result: return summary_status print(f"Summary result: {summary_result}") email_status = send_email(email, transcription_result, summary_result) if "Error" in email_status: return email_status sheet_update_status = update_sheet(sheet, link) if os.path.exists(output_file): os.remove(output_file) return f"Processing completed: {email_status}, {sheet_update_status}" # Background task to periodically check and process new links def main(): try: print("Initializing Google Sheets API...") scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"] creds = ServiceAccountCredentials.from_json_keyfile_name(GOOGLE_CREDS_PATH, scope) client = gspread.authorize(creds) sheet = client.open_by_key(SPREADSHEET_ID).worksheet(SHEET_NAME) print("Google Sheets API initialized successfully.") except Exception as e: print(f"Error initializing Google Sheets API: {e}") return f"Error initializing Google Sheets API: {e}" if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) if sheet: new_links, check_status = check_for_new_links(sheet) if not new_links: return check_status for record in new_links: processing_status = process_link(record, sheet) print(processing_status) return "Processing completed." else: return "Sheet not initialized." # Gradio Interface iface = gr.Interface( fn=main, inputs=[], outputs="text", live=False, title="Audio Processing Script" ) if __name__ == "__main__": iface.launch()