|
|
import gradio as gr |
|
|
import assemblyai as aai |
|
|
import re |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
|
|
|
aai.settings.api_key = os.getenv('ASSEMBLYAI_API_KEY') |
|
|
|
|
|
def create_assembly_transcript(audio_file): |
|
|
transcriber = aai.Transcriber() |
|
|
transcript = transcriber.transcribe( |
|
|
audio_file, config=aai.TranscriptionConfig(speaker_labels=True) |
|
|
) |
|
|
return transcript |
|
|
|
|
|
def transcript_to_string(transcript): |
|
|
output = "" |
|
|
for utterance in transcript.utterances: |
|
|
name = f"SPEAKER {utterance.speaker}" |
|
|
start_time = format_time(utterance.start) |
|
|
output += f"{name} {start_time}\n{utterance.text}\n\n" |
|
|
return output |
|
|
|
|
|
def format_time(milliseconds): |
|
|
seconds = milliseconds // 1000 |
|
|
hours, seconds = divmod(seconds, 3600) |
|
|
minutes, seconds = divmod(seconds, 60) |
|
|
return f"{int(hours):02}:{int(minutes):02}:{int(seconds):02}" |
|
|
|
|
|
def format_transcript_markdown(transcript_string): |
|
|
speaker_label_pattern = r"^(.+?)(?=\s\d{2}:\d{2}:\d{2})" |
|
|
timestamp_pattern = r"(\d{2}:\d{2}:\d{2})" |
|
|
formatted_transcript = re.sub( |
|
|
speaker_label_pattern, r"**\1**", transcript_string, flags=re.MULTILINE |
|
|
) |
|
|
formatted_transcript = re.sub( |
|
|
timestamp_pattern, r"_\1_", formatted_transcript, flags=re.MULTILINE |
|
|
) |
|
|
return formatted_transcript |
|
|
|
|
|
def transcribe_audio(audio_file): |
|
|
if audio_file is None: |
|
|
return "Please upload an audio file.", None |
|
|
|
|
|
try: |
|
|
transcript = create_assembly_transcript(audio_file) |
|
|
|
|
|
if transcript.error: |
|
|
return f"An error occurred: {transcript.error}", None |
|
|
|
|
|
transcript_string = transcript_to_string(transcript) |
|
|
md_transcript = format_transcript_markdown(transcript_string) |
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md') as temp_file: |
|
|
temp_file.write(md_transcript) |
|
|
temp_file_path = temp_file.name |
|
|
|
|
|
return transcript_string, temp_file_path |
|
|
except Exception as e: |
|
|
return f"An error occurred: {str(e)}", None |
|
|
|
|
|
def launch_app(): |
|
|
iface = gr.Interface( |
|
|
fn=transcribe_audio, |
|
|
inputs=gr.Audio(type="filepath", label="Upload Audio File"), |
|
|
outputs=[ |
|
|
gr.Textbox(label="Transcript Preview", lines=10), |
|
|
gr.File(label="Download Formatted Transcript") |
|
|
], |
|
|
title="Audio Transcription App", |
|
|
description="Upload an audio file to get a transcription with speaker labels. The preview shows plain text, while the download includes markdown formatting." |
|
|
) |
|
|
iface.launch() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
launch_app() |