From 15d914f2c017ff341b2979c792ebaa0c0ec348f9 Mon Sep 17 00:00:00 2001 From: ckaznable Date: Thu, 25 May 2023 16:54:31 +0800 Subject: [PATCH 1/3] perf: improve output segment format --- src/speech.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/speech.rs b/src/speech.rs index 8e714e9..a98dc10 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -57,7 +57,7 @@ pub fn process( (state.full_get_segment_text(i), state.full_get_segment_t0(i)) { if last_segment != segment { - f(segment.as_ref(), start_timestamp); + f(process_segment(segment.as_ref()).as_ref(), start_timestamp); } last_segment = segment; @@ -82,3 +82,51 @@ fn get_params<'a, 'b>(config: &SpeechConfig<'a>) -> FullParams<'a, 'b> { params } + +fn process_segment(segment: &str) -> String { + let segment = replace_effect_segment_to_space(segment); + merge_duplicate_segment(segment.trim()) +} + +fn replace_effect_segment_to_space(input: &str) -> String { + let mut result = String::new(); + let mut in_parentheses = false; + + for c in input.chars() { + if c == '(' { + in_parentheses = true; + } else if c == ')' { + in_parentheses = false; + result.push(' '); + } else if !in_parentheses { + result.push(c); + } + } + + result +} + +fn merge_duplicate_segment(input: &str) -> String { + let mut result = input.to_string(); + let mut prev_str = String::new(); + + let half_len = result.len() / 2; + + for (i, c) in input.chars().enumerate() { + if i > half_len { + return result; + } + + prev_str.push(c); + if prev_str.len() > 4 { + let detect_str = prev_str.to_owned() + prev_str.to_owned().as_ref(); + while result.starts_with(&detect_str) { + result = result.replace(&detect_str, ""); + } + + return result; + } + } + + result +} From 9b9ab815198630f95c65196ad2c781466237852e Mon Sep 17 00:00:00 2001 From: ckaznable Date: Thu, 25 May 2023 17:11:03 +0800 Subject: [PATCH 2/3] fix: resolve edge case of segment --- src/speech.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/speech.rs b/src/speech.rs index a98dc10..b915de0 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -108,9 +108,16 @@ fn replace_effect_segment_to_space(input: &str) -> String { fn merge_duplicate_segment(input: &str) -> String { let mut result = input.to_string(); - let mut prev_str = String::new(); + + let mut s = input.split(' '); + if let Some(first_sp) = s.next() { + if s.all(|sp| sp == first_sp) { + return first_sp.to_string(); + } + } let half_len = result.len() / 2; + let mut prev_str = String::new(); for (i, c) in input.chars().enumerate() { if i > half_len { From 3abcb8af54d9c698fdfe54242a35848b6b993032 Mon Sep 17 00:00:00 2001 From: ckaznable Date: Thu, 25 May 2023 17:22:44 +0800 Subject: [PATCH 3/3] fix: filter empty segment chunk --- src/speech.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/speech.rs b/src/speech.rs index b915de0..f0fef87 100644 --- a/src/speech.rs +++ b/src/speech.rs @@ -57,7 +57,10 @@ pub fn process( (state.full_get_segment_text(i), state.full_get_segment_t0(i)) { if last_segment != segment { - f(process_segment(segment.as_ref()).as_ref(), start_timestamp); + let segment = process_segment(segment.as_ref()); + if !segment.is_empty() { + f(&segment, start_timestamp); + } } last_segment = segment;