| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| use crate::error::{Error, Result}; |
|
|
| |
| |
| |
| |
| |
| |
| #[derive(Debug, Clone, Copy, PartialEq)] |
| pub struct MarineProsodyVector { |
| |
| |
| pub jp_mean: f32, |
|
|
| |
| |
| pub jp_std: f32, |
|
|
| |
| |
| pub ja_mean: f32, |
|
|
| |
| |
| pub ja_std: f32, |
|
|
| |
| |
| pub h_mean: f32, |
|
|
| |
| |
| pub s_mean: f32, |
|
|
| |
| |
| pub peak_density: f32, |
|
|
| |
| |
| pub energy_mean: f32, |
| } |
|
|
| impl MarineProsodyVector { |
| |
| pub fn zeros() -> Self { |
| Self { |
| jp_mean: 0.0, |
| jp_std: 0.0, |
| ja_mean: 0.0, |
| ja_std: 0.0, |
| h_mean: 1.0, |
| s_mean: 1.0, |
| peak_density: 0.0, |
| energy_mean: 0.0, |
| } |
| } |
|
|
| |
| pub fn to_array(&self) -> [f32; 8] { |
| [ |
| self.jp_mean, |
| self.jp_std, |
| self.ja_mean, |
| self.ja_std, |
| self.h_mean, |
| self.s_mean, |
| self.peak_density, |
| self.energy_mean, |
| ] |
| } |
|
|
| |
| pub fn from_array(arr: [f32; 8]) -> Self { |
| Self { |
| jp_mean: arr[0], |
| jp_std: arr[1], |
| ja_mean: arr[2], |
| ja_std: arr[3], |
| h_mean: arr[4], |
| s_mean: arr[5], |
| peak_density: arr[6], |
| energy_mean: arr[7], |
| } |
| } |
|
|
| |
| pub fn combined_jitter(&self) -> f32 { |
| (self.jp_mean + self.ja_mean) / 2.0 |
| } |
|
|
| |
| |
| pub fn estimate_valence(&self) -> f32 { |
| |
| |
| let jitter_factor = 1.0 / (1.0 + self.combined_jitter()); |
| let energy_factor = self.energy_mean.sqrt(); |
|
|
| |
| (jitter_factor * energy_factor * 2.0 - 1.0).clamp(-1.0, 1.0) |
| } |
|
|
| |
| |
| pub fn estimate_arousal(&self) -> f32 { |
| |
| let density_factor = (self.peak_density / 100.0).clamp(0.0, 1.0); |
| let energy_factor = self.energy_mean.sqrt(); |
| let variance_factor = (self.jp_std + self.ja_std).clamp(0.0, 1.0); |
|
|
| ((density_factor + energy_factor + variance_factor) / 3.0).clamp(0.0, 1.0) |
| } |
| } |
|
|
| impl Default for MarineProsodyVector { |
| fn default() -> Self { |
| Self::zeros() |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| pub struct MarineProsodyConditioner { |
| sample_rate: u32, |
| jitter_low: f32, |
| jitter_high: f32, |
| min_period: u32, |
| max_period: u32, |
| ema_alpha: f32, |
| } |
|
|
| impl MarineProsodyConditioner { |
| |
| pub fn new(sample_rate: u32) -> Self { |
| |
| let min_period = sample_rate / 4000; |
| let max_period = sample_rate / 60; |
|
|
| Self { |
| sample_rate, |
| jitter_low: 0.02, |
| jitter_high: 0.60, |
| min_period, |
| max_period, |
| ema_alpha: 0.01, |
| } |
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| pub fn from_samples(&self, samples: &[f32]) -> Result<MarineProsodyVector> { |
| if samples.is_empty() { |
| return Err(Error::Audio("Empty audio buffer".into())); |
| } |
|
|
| |
| let mut peaks: Vec<PeakInfo> = Vec::new(); |
| let clip_threshold = 1e-3; |
|
|
| |
| for i in 1..samples.len().saturating_sub(1) { |
| let prev = samples[i - 1].abs(); |
| let curr = samples[i].abs(); |
| let next = samples[i + 1].abs(); |
|
|
| if curr > prev && curr > next && curr > clip_threshold { |
| peaks.push(PeakInfo { |
| index: i, |
| amplitude: curr, |
| }); |
| } |
| } |
|
|
| if peaks.len() < 3 { |
| |
| return Ok(MarineProsodyVector::zeros()); |
| } |
|
|
| |
| let mut periods: Vec<f32> = Vec::new(); |
| let mut amplitudes: Vec<f32> = Vec::new(); |
| let mut jp_values: Vec<f32> = Vec::new(); |
| let mut ja_values: Vec<f32> = Vec::new(); |
|
|
| |
| let mut ema_period = 0.0f32; |
| let mut ema_amp = 0.0f32; |
| let mut ema_initialized = false; |
|
|
| for i in 1..peaks.len() { |
| let period = (peaks[i].index - peaks[i - 1].index) as f32; |
| let amp = peaks[i].amplitude; |
|
|
| |
| if period > self.min_period as f32 && period < self.max_period as f32 { |
| periods.push(period); |
| amplitudes.push(amp); |
|
|
| if !ema_initialized { |
| ema_period = period; |
| ema_amp = amp; |
| ema_initialized = true; |
| } else { |
| |
| let jp = (period - ema_period).abs() / ema_period; |
| let ja = (amp - ema_amp).abs() / ema_amp; |
| jp_values.push(jp); |
| ja_values.push(ja); |
|
|
| |
| ema_period = self.ema_alpha * period + (1.0 - self.ema_alpha) * ema_period; |
| ema_amp = self.ema_alpha * amp + (1.0 - self.ema_alpha) * ema_amp; |
| } |
| } |
| } |
|
|
| if jp_values.is_empty() { |
| return Ok(MarineProsodyVector::zeros()); |
| } |
|
|
| |
| let n = jp_values.len() as f32; |
| let duration_sec = samples.len() as f32 / self.sample_rate as f32; |
|
|
| |
| let jp_mean = jp_values.iter().sum::<f32>() / n; |
| let ja_mean = ja_values.iter().sum::<f32>() / n; |
| let energy_mean = amplitudes.iter().map(|a| a * a).sum::<f32>() / amplitudes.len() as f32; |
|
|
| |
| let jp_var = jp_values.iter().map(|x| (x - jp_mean).powi(2)).sum::<f32>() / n; |
| let ja_var = ja_values.iter().map(|x| (x - ja_mean).powi(2)).sum::<f32>() / n; |
| let jp_std = jp_var.sqrt(); |
| let ja_std = ja_var.sqrt(); |
|
|
| |
| let h_mean = 1.0; |
|
|
| |
| let s_mean = 1.0 / (1.0 + jp_mean + ja_mean); |
|
|
| |
| let peak_density = peaks.len() as f32 / duration_sec; |
|
|
| Ok(MarineProsodyVector { |
| jp_mean, |
| jp_std, |
| ja_mean, |
| ja_std, |
| h_mean, |
| s_mean, |
| peak_density, |
| energy_mean, |
| }) |
| } |
|
|
| |
| |
| |
| pub fn validate_tts_output(&self, samples: &[f32]) -> Result<TTSQualityReport> { |
| let prosody = self.from_samples(samples)?; |
|
|
| let mut issues = Vec::new(); |
|
|
| |
| if prosody.jp_mean < 0.005 { |
| issues.push("Too perfect - sounds robotic (add natural variation)"); |
| } |
|
|
| if prosody.jp_mean > 0.3 { |
| issues.push("High period jitter - possible artifacts"); |
| } |
|
|
| if prosody.ja_mean > 0.4 { |
| issues.push("High amplitude jitter - volume inconsistency"); |
| } |
|
|
| if prosody.s_mean < 0.4 { |
| issues.push("Low salience - audio quality issues"); |
| } |
|
|
| if prosody.peak_density < 10.0 { |
| issues.push("Low peak density - missing speech energy"); |
| } |
|
|
| let quality_score = prosody.s_mean * 100.0; |
|
|
| Ok(TTSQualityReport { |
| prosody, |
| quality_score, |
| issues, |
| }) |
| } |
|
|
| |
| pub fn sample_rate(&self) -> u32 { |
| self.sample_rate |
| } |
| } |
|
|
| |
| struct PeakInfo { |
| index: usize, |
| amplitude: f32, |
| } |
|
|
| |
| #[derive(Debug, Clone)] |
| pub struct TTSQualityReport { |
| |
| pub prosody: MarineProsodyVector, |
| |
| pub quality_score: f32, |
| |
| pub issues: Vec<&'static str>, |
| } |
|
|
| impl TTSQualityReport { |
| |
| pub fn passes(&self, threshold: f32) -> bool { |
| self.quality_score >= threshold && self.issues.is_empty() |
| } |
| } |
|
|
| #[cfg(test)] |
| mod tests { |
| use super::*; |
|
|
| #[test] |
| fn test_prosody_vector_array_conversion() { |
| let vec = MarineProsodyVector { |
| jp_mean: 0.1, |
| jp_std: 0.05, |
| ja_mean: 0.2, |
| ja_std: 0.1, |
| h_mean: 0.9, |
| s_mean: 0.8, |
| peak_density: 50.0, |
| energy_mean: 0.3, |
| }; |
|
|
| let arr = vec.to_array(); |
| let reconstructed = MarineProsodyVector::from_array(arr); |
|
|
| assert_eq!(vec.jp_mean, reconstructed.jp_mean); |
| assert_eq!(vec.s_mean, reconstructed.s_mean); |
| } |
|
|
| #[test] |
| fn test_conditioner_empty_buffer() { |
| let conditioner = MarineProsodyConditioner::new(22050); |
| let result = conditioner.from_samples(&[]); |
| assert!(result.is_err()); |
| } |
|
|
| #[test] |
| fn test_conditioner_silence() { |
| let conditioner = MarineProsodyConditioner::new(22050); |
| let silence = vec![0.0; 1000]; |
| let prosody = conditioner.from_samples(&silence).unwrap(); |
| |
| assert_eq!(prosody.peak_density, 0.0); |
| } |
|
|
| #[test] |
| fn test_estimate_valence() { |
| let positive = MarineProsodyVector { |
| jp_mean: 0.01, |
| jp_std: 0.01, |
| ja_mean: 0.01, |
| ja_std: 0.01, |
| h_mean: 1.0, |
| s_mean: 0.95, |
| peak_density: 100.0, |
| energy_mean: 0.8, |
| }; |
|
|
| let negative = MarineProsodyVector { |
| jp_mean: 0.5, |
| jp_std: 0.3, |
| ja_mean: 0.4, |
| ja_std: 0.2, |
| h_mean: 0.7, |
| s_mean: 0.4, |
| peak_density: 30.0, |
| energy_mean: 0.1, |
| }; |
|
|
| |
| assert!(positive.estimate_valence() > negative.estimate_valence()); |
| } |
| } |
|
|