Spaces:
Paused
Paused
WIP
Browse files
app.py
CHANGED
|
@@ -10,9 +10,6 @@ import os
|
|
| 10 |
import json
|
| 11 |
from pathlib import Path
|
| 12 |
|
| 13 |
-
# Configure loguru
|
| 14 |
-
logger.add("app.log", rotation="500 MB", level="DEBUG")
|
| 15 |
-
|
| 16 |
MODEL_NAME = "muhtasham/whisper-tg"
|
| 17 |
|
| 18 |
def format_time(seconds):
|
|
@@ -129,53 +126,9 @@ def transcribe(inputs, return_timestamps, generate_subs, batch_size, chunk_lengt
|
|
| 129 |
logger.exception(f"Error during transcription: {str(e)}")
|
| 130 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
| 131 |
|
| 132 |
-
# Create a custom flagging callback
|
| 133 |
-
class TranscriptionFlaggingCallback(gr.FlaggingCallback):
|
| 134 |
-
def __init__(self, flagging_dir):
|
| 135 |
-
self.flagging_dir = Path(flagging_dir)
|
| 136 |
-
self.flagging_dir.mkdir(exist_ok=True)
|
| 137 |
-
self.log_file = self.flagging_dir / "flagged_data.jsonl"
|
| 138 |
-
|
| 139 |
-
def setup(self, components, flagging_dir):
|
| 140 |
-
pass
|
| 141 |
-
|
| 142 |
-
def flag(self, components, flag_data, flag_option, username):
|
| 143 |
-
try:
|
| 144 |
-
# Create a unique filename for the audio file
|
| 145 |
-
audio_file = components[0] # First component is the audio input
|
| 146 |
-
if audio_file:
|
| 147 |
-
audio_filename = os.path.basename(audio_file)
|
| 148 |
-
# Copy audio file to flagged directory
|
| 149 |
-
audio_dir = self.flagging_dir / "audio"
|
| 150 |
-
audio_dir.mkdir(exist_ok=True)
|
| 151 |
-
import shutil
|
| 152 |
-
shutil.copy2(audio_file, audio_dir / audio_filename)
|
| 153 |
-
else:
|
| 154 |
-
audio_filename = None
|
| 155 |
-
|
| 156 |
-
# Prepare the data to save
|
| 157 |
-
data = {
|
| 158 |
-
"timestamp": datetime.datetime.now().isoformat(),
|
| 159 |
-
"audio_file": audio_filename,
|
| 160 |
-
"transcription": components[1], # JSON output
|
| 161 |
-
"correction": components[2] if len(components) > 2 else None, # Correction text if provided
|
| 162 |
-
"username": username
|
| 163 |
-
}
|
| 164 |
-
|
| 165 |
-
# Append to JSONL file
|
| 166 |
-
with open(self.log_file, "a", encoding="utf-8") as f:
|
| 167 |
-
f.write(json.dumps(data) + "\n")
|
| 168 |
-
|
| 169 |
-
logger.info(f"Saved flagged data: {data}")
|
| 170 |
-
except Exception as e:
|
| 171 |
-
logger.error(f"Error while flagging: {str(e)}")
|
| 172 |
-
raise gr.Error(f"Failed to save feedback: {str(e)}")
|
| 173 |
|
| 174 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
| 175 |
|
| 176 |
-
# Create flagging callback
|
| 177 |
-
flagging_callback = TranscriptionFlaggingCallback("flagged_data")
|
| 178 |
-
|
| 179 |
# Define interfaces first
|
| 180 |
mf_transcribe = gr.Interface(
|
| 181 |
fn=transcribe,
|
|
@@ -195,9 +148,7 @@ mf_transcribe = gr.Interface(
|
|
| 195 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
| 196 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
| 197 |
" of arbitrary length."
|
| 198 |
-
)
|
| 199 |
-
flagging_mode="manual",
|
| 200 |
-
flagging_dir="flagged_data"
|
| 201 |
)
|
| 202 |
|
| 203 |
file_transcribe = gr.Interface(
|
|
@@ -218,63 +169,12 @@ file_transcribe = gr.Interface(
|
|
| 218 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
| 219 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
| 220 |
" of arbitrary length."
|
| 221 |
-
)
|
| 222 |
-
flagging_mode="manual",
|
| 223 |
-
flagging_dir="flagged_data"
|
| 224 |
)
|
| 225 |
|
| 226 |
# Then set up the demo with the interfaces
|
| 227 |
with demo:
|
| 228 |
-
|
| 229 |
-
with gr.Tab("Audio file"):
|
| 230 |
-
file_transcribe.render()
|
| 231 |
-
with gr.Tab("Microphone"):
|
| 232 |
-
mf_transcribe.render()
|
| 233 |
-
with gr.Tab("Feedback"):
|
| 234 |
-
with gr.Row():
|
| 235 |
-
with gr.Column():
|
| 236 |
-
gr.Markdown("### Provide Feedback")
|
| 237 |
-
gr.Markdown("If you notice any issues with the transcription, please provide the correct text below.")
|
| 238 |
-
feedback_text = gr.Textbox(
|
| 239 |
-
label="Correct transcription",
|
| 240 |
-
placeholder="Enter the correct transcription here...",
|
| 241 |
-
lines=5
|
| 242 |
-
)
|
| 243 |
-
submit_btn = gr.Button("Submit Feedback")
|
| 244 |
-
with gr.Column():
|
| 245 |
-
gr.Markdown("### Instructions")
|
| 246 |
-
gr.Markdown("""
|
| 247 |
-
1. Transcribe your audio in the Audio file or Microphone tab
|
| 248 |
-
2. If you notice any issues, copy the transcription here
|
| 249 |
-
3. Edit the text to provide the correct version
|
| 250 |
-
4. Click Submit Feedback
|
| 251 |
-
""")
|
| 252 |
-
|
| 253 |
-
def submit_feedback(text):
|
| 254 |
-
if not text.strip():
|
| 255 |
-
raise gr.Error("Please provide the correct transcription.")
|
| 256 |
-
try:
|
| 257 |
-
# Get the current outputs from either interface
|
| 258 |
-
file_outputs = file_transcribe.output_components
|
| 259 |
-
mic_outputs = mf_transcribe.output_components
|
| 260 |
-
|
| 261 |
-
# Save the feedback
|
| 262 |
-
flagging_callback.flag(
|
| 263 |
-
components=[None, file_outputs[0], text], # No audio file, just transcription and correction
|
| 264 |
-
flag_data=None,
|
| 265 |
-
flag_option=None,
|
| 266 |
-
username=None
|
| 267 |
-
)
|
| 268 |
-
return "Thank you for your feedback!"
|
| 269 |
-
except Exception as e:
|
| 270 |
-
logger.error(f"Error submitting feedback: {str(e)}")
|
| 271 |
-
raise gr.Error(f"Failed to save feedback: {str(e)}")
|
| 272 |
-
|
| 273 |
-
submit_btn.click(
|
| 274 |
-
submit_feedback,
|
| 275 |
-
inputs=[feedback_text],
|
| 276 |
-
outputs=[gr.Textbox(label="Status")]
|
| 277 |
-
)
|
| 278 |
|
| 279 |
logger.info("Starting Gradio interface")
|
| 280 |
demo.queue().launch(ssr_mode=False)
|
|
|
|
| 10 |
import json
|
| 11 |
from pathlib import Path
|
| 12 |
|
|
|
|
|
|
|
|
|
|
| 13 |
MODEL_NAME = "muhtasham/whisper-tg"
|
| 14 |
|
| 15 |
def format_time(seconds):
|
|
|
|
| 126 |
logger.exception(f"Error during transcription: {str(e)}")
|
| 127 |
raise gr.Error(f"Failed to transcribe audio: {str(e)}")
|
| 128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
demo = gr.Blocks(theme=gr.themes.Ocean())
|
| 131 |
|
|
|
|
|
|
|
|
|
|
| 132 |
# Define interfaces first
|
| 133 |
mf_transcribe = gr.Interface(
|
| 134 |
fn=transcribe,
|
|
|
|
| 148 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
| 149 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
| 150 |
" of arbitrary length."
|
| 151 |
+
)
|
|
|
|
|
|
|
| 152 |
)
|
| 153 |
|
| 154 |
file_transcribe = gr.Interface(
|
|
|
|
| 169 |
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
|
| 170 |
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
|
| 171 |
" of arbitrary length."
|
| 172 |
+
)
|
|
|
|
|
|
|
| 173 |
)
|
| 174 |
|
| 175 |
# Then set up the demo with the interfaces
|
| 176 |
with demo:
|
| 177 |
+
gr.TabbedInterface([file_transcribe, mf_transcribe], ["Audio file", "Microphone"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
|
| 179 |
logger.info("Starting Gradio interface")
|
| 180 |
demo.queue().launch(ssr_mode=False)
|