From e3c70bc1273fb9ac668650ba80e34f27a6cfc9a2 Mon Sep 17 00:00:00 2001 From: yuyun2000 <15515722313yxw@gmail.com> Date: Tue, 6 May 2025 16:48:11 +0800 Subject: [PATCH 1/2] Implement Sola algorithm for smoother audio transitions Apply the Synchronized Overlap-Add (SOLA) algorithm to smooth the connection between audio segments output by the decoder, resulting in more natural-sounding transitions between segments. --- .../llm_framework/main_melotts/src/main.cpp | 67 +++-- .../main_melotts/src/runner/Lexicon.hpp | 1 + .../main_melotts/src/runner/SolaProcessor.h | 269 ++++++++++++++++++ 3 files changed, 315 insertions(+), 22 deletions(-) create mode 100644 projects/llm_framework/main_melotts/src/runner/SolaProcessor.h diff --git a/projects/llm_framework/main_melotts/src/main.cpp b/projects/llm_framework/main_melotts/src/main.cpp index b5a27cb..610362a 100644 --- a/projects/llm_framework/main_melotts/src/main.cpp +++ b/projects/llm_framework/main_melotts/src/main.cpp @@ -9,6 +9,7 @@ #include "Lexicon.hpp" #include #include "AudioFile.h" +#include "SolaProcessor.h" #include "Lexicon.hpp" #include @@ -263,49 +264,71 @@ class llm_task { auto encoder_output = encoder_->Run(phones, tones, langids, g_matrix, mode_config_.noise_scale, mode_config_.noise_scale_w, mode_config_.get_length_scale(), mode_config_.sdp_ratio); - float *zp_data = encoder_output.at(0).GetTensorMutableData(); - int audio_len = encoder_output.at(2).GetTensorMutableData()[0]; - auto zp_info = encoder_output.at(0).GetTensorTypeAndShapeInfo(); - auto zp_shape = zp_info.GetShape(); - int zp_size = decoder_->GetInputSize(0) / sizeof(float); - int dec_len = zp_size / zp_shape[1]; - int audio_slice_len = decoder_->GetOutputSize(0) / sizeof(float); - std::vector decoder_output(audio_slice_len); - int dec_slice_num = int(std::ceil(zp_shape[2] * 1.0 / dec_len)); + float *zp_data = encoder_output.at(0).GetTensorMutableData(); + int audio_len = encoder_output.at(2).GetTensorMutableData()[0]; + auto zp_info = encoder_output.at(0).GetTensorTypeAndShapeInfo(); + auto zp_shape = zp_info.GetShape(); + + // Decoder parameters setup + int zp_size = decoder_->GetInputSize(0) / sizeof(float); + int dec_len = zp_size / zp_shape[1]; + int audio_slice_len = decoder_->GetOutputSize(0) / sizeof(float); + const int pad_frames = 16; + const int samples_per_frame = 512; + const int effective_frames = dec_len - 2 * pad_frames; + int dec_slice_num = + static_cast(std::ceil(static_cast(zp_shape[2]) / static_cast(effective_frames))); + SolaProcessor sola(pad_frames, samples_per_frame); std::vector pcmlist; + for (int i = 0; i < dec_slice_num; i++) { + int input_start = i * effective_frames; + if (i > 0) { + input_start -= pad_frames; + } + input_start = std::max(0, input_start); + int actual_len = std::min(dec_len, static_cast(zp_shape[2] - input_start)); std::vector zp(zp_size, 0); - int actual_size = (i + 1) * dec_len < zp_shape[2] ? dec_len : zp_shape[2] - i * dec_len; + for (int n = 0; n < zp_shape[1]; n++) { - memcpy(zp.data() + n * dec_len, zp_data + n * zp_shape[2] + i * dec_len, - sizeof(float) * actual_size); + int copy_size = std::min(actual_len, static_cast(zp_shape[2] - input_start)); + if (copy_size > 0) { + memcpy(zp.data() + n * dec_len, zp_data + n * zp_shape[2] + input_start, + sizeof(float) * copy_size); + } } + // Run decoder + std::vector decoder_output(audio_slice_len); decoder_->SetInput(zp.data(), 0); decoder_->SetInput(g_matrix.data(), 1); if (0 != decoder_->Run()) { - printf("Run decoder model failed!\n"); throw std::string("decoder_ RunSync error"); } decoder_->GetOutput(decoder_output.data(), 0); - actual_size = (i + 1) * audio_slice_len < audio_len ? audio_slice_len : audio_len - i * audio_slice_len; - if (decoder_output.size() > actual_size) { - pcmlist.reserve(pcmlist.size() + actual_size); - std::copy(decoder_output.begin(), decoder_output.begin() + actual_size, - std::back_inserter(pcmlist)); - } else { - pcmlist.reserve(pcmlist.size() + decoder_output.size()); - std::copy(decoder_output.begin(), decoder_output.end(), std::back_inserter(pcmlist)); - } + std::vector processed_output = sola.ProcessFrame(decoder_output, i, dec_slice_num, actual_len); + + pcmlist.insert(pcmlist.end(), processed_output.begin(), processed_output.end()); } + double src_ratio = (mode_config_.audio_rate * 1.0f) / (mode_config_.mode_rate * 1.0f); std::vector tmp_pcm((pcmlist.size() * src_ratio + 1)); int len; resample_audio(pcmlist.data(), pcmlist.size(), tmp_pcm.data(), &len, src_ratio); + + // Convert to 16-bit PCM + wav_pcm_data.reserve(len); std::transform(tmp_pcm.begin(), tmp_pcm.begin() + len, std::back_inserter(wav_pcm_data), [](const auto val) { return (int16_t)(val * INT16_MAX); }); + + // Call callback function with output if (out_callback_) out_callback_(std::string((char *)wav_pcm_data.data(), wav_pcm_data.size() * sizeof(int16_t)), finish); + + } catch (const std::exception &e) { + SLOGI("TTS processing exception: %s", e.what()); + return true; } catch (...) { + SLOGI("TTS processing encountered unknown exception"); return true; } return false; diff --git a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp index 242fb15..242e9e0 100644 --- a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp +++ b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp @@ -32,6 +32,7 @@ class Lexicon { public: Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0) { + SLOGI("词典加载: %zu 发音表加载: %zu", tokens_filename, lexicon_filename); std::unordered_map tokens; std::ifstream ifs(tokens_filename); assert(ifs.is_open()); diff --git a/projects/llm_framework/main_melotts/src/runner/SolaProcessor.h b/projects/llm_framework/main_melotts/src/runner/SolaProcessor.h new file mode 100644 index 0000000..a2286bb --- /dev/null +++ b/projects/llm_framework/main_melotts/src/runner/SolaProcessor.h @@ -0,0 +1,269 @@ +#ifndef SOLA_PROCESSOR_H +#define SOLA_PROCESSOR_H + +#include +#include +#include +#include +#include + +/** + * SolaProcessor - Synchronous Overlap-Add method for audio frame processing + * + * This class provides functionality for smoothly concatenating audio frames + * using the SOLA algorithm, which finds optimal alignment points between + * consecutive frames and applies crossfading for smooth transitions. + */ +class SolaProcessor { +public: + /** + * Constructor + * + * @param padFrames Number of padding frames at the beginning and end + * @param samplesPerFrame Number of audio samples in each frame + */ + SolaProcessor(int padFrames, int samplesPerFrame) + : pad_frames_(padFrames), samples_per_frame_(samplesPerFrame), first_frame_(true) + { + Initialize(); + } + + /** + * Reset the processor to its initial state + */ + void Reset() + { + first_frame_ = true; + std::fill(sola_buffer_.begin(), sola_buffer_.end(), 0.0f); + } + + /** + * Process a single audio frame + * + * @param decoder_output Raw audio data from decoder + * @param frameIndex Current frame index + * @param totalFrames Total number of frames + * @param actualFrameLen Actual length of the frame + * @return Processed audio samples + */ + std::vector ProcessFrame(const std::vector& decoder_output, int frameIndex, int totalFrames, + int actualFrameLen) + { + std::vector processed_output; + + if (first_frame_) { + // Special handling for the first frame + ProcessFirstFrame(decoder_output, processed_output, actualFrameLen); + first_frame_ = false; + } else { + // Process subsequent frames with SOLA algorithm + ProcessSubsequentFrame(decoder_output, processed_output, frameIndex, totalFrames, actualFrameLen); + } + + return processed_output; + } + +private: + /** + * Initialize the SOLA processor parameters and buffers + */ + void Initialize() + { + // Calculate SOLA parameters + sola_buffer_frame_ = pad_frames_ * samples_per_frame_; + sola_search_frame_ = pad_frames_ * samples_per_frame_; + effective_frames_ = 0; // Will be set during frame processing + + // Create fade-in and fade-out windows + fade_in_window_.resize(sola_buffer_frame_); + fade_out_window_.resize(sola_buffer_frame_); + + for (int i = 0; i < sola_buffer_frame_; i++) { + fade_in_window_[i] = static_cast(i) / sola_buffer_frame_; + fade_out_window_[i] = 1.0f - fade_in_window_[i]; + } + + // Initialize SOLA buffer + sola_buffer_.resize(sola_buffer_frame_, 0.0f); + } + + /** + * Process the first audio frame + * + * @param decoder_output Raw audio data from decoder + * @param processed_output Output buffer for processed audio + * @param actualFrameLen Actual length of the frame + */ + void ProcessFirstFrame(const std::vector& decoder_output, std::vector& processed_output, + int actualFrameLen) + { + int audio_start = pad_frames_ * samples_per_frame_; + int audio_len = (actualFrameLen - 2 * pad_frames_) * samples_per_frame_; + + // Boundary check + audio_len = std::min(audio_len, static_cast(decoder_output.size() - audio_start)); + + // Add first frame data to output + processed_output.insert(processed_output.end(), decoder_output.begin() + audio_start, + decoder_output.begin() + audio_start + audio_len); + + // Save the end part to SOLA buffer for next frame alignment + int buffer_start = audio_start + audio_len; + if (buffer_start + sola_buffer_frame_ <= decoder_output.size()) { + std::copy(decoder_output.begin() + buffer_start, decoder_output.begin() + buffer_start + sola_buffer_frame_, + sola_buffer_.begin()); + } + } + + /** + * Process subsequent audio frames using SOLA algorithm + * + * @param decoder_output Raw audio data from decoder + * @param processed_output Output buffer for processed audio + * @param frameIndex Current frame index + * @param totalFrames Total number of frames + * @param actualFrameLen Actual length of the frame + */ + void ProcessSubsequentFrame(const std::vector& decoder_output, std::vector& processed_output, + int frameIndex, int totalFrames, int actualFrameLen) + { + int audio_start = pad_frames_ * samples_per_frame_; + + // 1. Prepare search window + std::vector search_window(sola_buffer_frame_ + sola_search_frame_); + std::copy(decoder_output.begin() + audio_start, decoder_output.begin() + audio_start + search_window.size(), + search_window.begin()); + + // 2. Find best alignment point (compute cross-correlation) + int best_offset = FindBestOffset(search_window); + + // 3. Apply alignment offset + int aligned_start = audio_start + best_offset; + + // 4. Create smooth transition + std::vector crossfade_region = CreateCrossfade(decoder_output, aligned_start); + + // 5. Add crossfade region to output + processed_output.insert(processed_output.end(), crossfade_region.begin(), crossfade_region.end()); + + // 6. Add remaining valid audio data + AddRemainingAudio(decoder_output, processed_output, aligned_start, frameIndex, totalFrames, actualFrameLen); + } + + /** + * Find the best alignment offset using normalized cross-correlation + * + * @param search_window Window of audio samples to search in + * @return Optimal offset for alignment + */ + int FindBestOffset(const std::vector& search_window) + { + int best_offset = 0; + float best_correlation = -1.0f; + + for (int offset = 0; offset <= sola_search_frame_; offset++) { + float correlation = 0.0f; + float energy = 0.0f; + + for (int j = 0; j < sola_buffer_frame_; j++) { + correlation += sola_buffer_[j] * search_window[j + offset]; + energy += search_window[j + offset] * search_window[j + offset]; + } + + // Normalize correlation + float normalized_correlation = (energy > 1e-8) ? correlation / std::sqrt(energy) : 0.0f; + + if (normalized_correlation > best_correlation) { + best_correlation = normalized_correlation; + best_offset = offset; + } + } + + return best_offset; + } + + /** + * Create crossfade transition region + * + * @param decoder_output Raw audio data from decoder + * @param aligned_start Starting point after alignment + * @return Crossfaded audio samples + */ + std::vector CreateCrossfade(const std::vector& decoder_output, int aligned_start) + { + std::vector crossfade_region(sola_buffer_frame_); + + for (int j = 0; j < sola_buffer_frame_; j++) { + // Apply fade-in and fade-out window functions + crossfade_region[j] = + decoder_output[aligned_start + j] * fade_in_window_[j] + sola_buffer_[j] * fade_out_window_[j]; + } + + return crossfade_region; + } + + /** + * Add remaining audio data and update buffer + * + * @param decoder_output Raw audio data from decoder + * @param processed_output Output buffer for processed audio + * @param aligned_start Starting point after alignment + * @param frameIndex Current frame index + * @param totalFrames Total number of frames + * @param actualFrameLen Actual length of the frame + */ + void AddRemainingAudio(const std::vector& decoder_output, std::vector& processed_output, + int aligned_start, int frameIndex, int totalFrames, int actualFrameLen) + { + int remaining_start = aligned_start + sola_buffer_frame_; + int remaining_len = (actualFrameLen - 2 * pad_frames_) * samples_per_frame_ - sola_buffer_frame_; + + // Boundary check + remaining_len = std::min(remaining_len, static_cast(decoder_output.size() - remaining_start)); + + if (remaining_len > 0) { + processed_output.insert(processed_output.end(), decoder_output.begin() + remaining_start, + decoder_output.begin() + remaining_start + remaining_len); + } + + // Update SOLA buffer + UpdateSolaBuffer(decoder_output, remaining_start + remaining_len); + } + + /** + * Update SOLA buffer with new audio data + * + * @param decoder_output Raw audio data from decoder + * @param buffer_start Starting point for the new buffer data + */ + void UpdateSolaBuffer(const std::vector& decoder_output, int buffer_start) + { + // Check if there's enough data for the next buffer + if (buffer_start + sola_buffer_frame_ <= decoder_output.size()) { + std::copy(decoder_output.begin() + buffer_start, decoder_output.begin() + buffer_start + sola_buffer_frame_, + sola_buffer_.begin()); + } else { + // Fill with zeros if not enough data + int avail = static_cast(decoder_output.size() - buffer_start); + if (avail > 0) { + std::copy(decoder_output.begin() + buffer_start, decoder_output.end(), sola_buffer_.begin()); + } + std::fill(sola_buffer_.begin() + avail, sola_buffer_.end(), 0.0f); + } + } + +private: + int pad_frames_; // Number of padding frames + int samples_per_frame_; // Number of samples per frame + int effective_frames_; // Number of effective frames + int sola_buffer_frame_; // SOLA buffer length + int sola_search_frame_; // SOLA search window length + + std::vector fade_in_window_; // Fade-in window + std::vector fade_out_window_; // Fade-out window + std::vector sola_buffer_; // SOLA buffer + + bool first_frame_; // Flag for first frame processing +}; + +#endif // SOLA_PROCESSOR_H From a151affa1d2891da0e8f65ce4f176a249df97b3f Mon Sep 17 00:00:00 2001 From: yuyun2000 <15515722313yxw@gmail.com> Date: Tue, 6 May 2025 16:50:35 +0800 Subject: [PATCH 2/2] Translate logs in Lexicon.hpp to English and add debug switch - Convert all Chinese log messages in Lexicon.hpp to English for better international compatibility\n- Add a debug flag to control whether to display g2p process logs\n- Improve code readability and debugging experience --- .../main_melotts/src/runner/Lexicon.hpp | 104 +++++++++--------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp index 242e9e0..d1bcbe9 100644 --- a/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp +++ b/projects/llm_framework/main_melotts/src/runner/Lexicon.hpp @@ -1,5 +1,4 @@ #pragma once - #include #include #include @@ -9,7 +8,15 @@ #include #include #include "../../../../../SDK/components/utilities/include/sample_log.h" - +// Debug logging switch - set to true to enable debug logs +static bool DEBUG_LOGGING = false; +// Macro for debug logging +#define DEBUG_LOG(fmt, ...) \ + do { \ + if (DEBUG_LOGGING) { \ + SLOGI(fmt, ##__VA_ARGS__); \ + } \ + } while (0) std::vector split(const std::string& s, char delim) { std::vector result; @@ -30,9 +37,16 @@ class Lexicon { std::unordered_map reverse_tokens; public: + // Setter for debug logging + static void setDebugLogging(bool enable) + { + DEBUG_LOGGING = enable; + } Lexicon(const std::string& lexicon_filename, const std::string& tokens_filename) : max_phrase_length(0) { - SLOGI("词典加载: %zu 发音表加载: %zu", tokens_filename, lexicon_filename); + DEBUG_LOG("Dictionary loading: %s Pronunciation table loading: %s", tokens_filename.c_str(), + lexicon_filename.c_str()); + std::unordered_map tokens; std::ifstream ifs(tokens_filename); assert(ifs.is_open()); @@ -83,8 +97,10 @@ class Lexicon { lexicon["。"] = lexicon["."]; lexicon["!"] = lexicon["!"]; lexicon["?"] = lexicon["?"]; - SLOGI("词典加载完成,包含 %zu 个条目,最长词组长度: %zu", lexicon.size(), max_phrase_length); + DEBUG_LOG("Dictionary loading complete, containing %zu entries, longest phrase length: %zu", lexicon.size(), + max_phrase_length); } + std::vector splitEachChar(const std::string& text) { std::vector words; @@ -95,93 +111,77 @@ class Lexicon { if ((text[i] & 0x80) == 0x00) { // ASCII } else if ((text[i] & 0xE0) == 0xC0) { - next = 2; // 2字节UTF-8 + next = 2; // 2-byte UTF-8 } else if ((text[i] & 0xF0) == 0xE0) { - next = 3; // 3字节UTF-8 + next = 3; // 3-byte UTF-8 } else if ((text[i] & 0xF8) == 0xF0) { - next = 4; // 4字节UTF-8 + next = 4; // 4-byte UTF-8 } words.push_back(text.substr(i, next)); i += next; } return words; } + bool is_english(const std::string& s) { return s.size() == 1 && ((s[0] >= 'A' && s[0] <= 'Z') || (s[0] >= 'a' && s[0] <= 'z')); } - bool is_english_token_char(const std::string& s) { if (s.size() != 1) return false; char c = s[0]; return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-' || c == '_'; } - void process_unknown_english(const std::string& word, std::vector& phones, std::vector& tones) { - SLOGI("Processing unknown term: %s", word.c_str()); - + DEBUG_LOG("Processing unknown term: %s", word.c_str()); std::string orig_word = word; std::vector parts; std::vector phonetic_parts; - size_t start = 0; while (start < word.size()) { bool matched = false; - for (size_t len = std::min(word.size() - start, (size_t)10); len > 0 && !matched; --len) { std::string sub_word = word.substr(start, len); std::string lower_sub_word = sub_word; std::transform(lower_sub_word.begin(), lower_sub_word.end(), lower_sub_word.begin(), [](unsigned char c) { return std::tolower(c); }); - if (lexicon.find(lower_sub_word) != lexicon.end()) { // Substring found in lexicon auto& [sub_phones, sub_tones] = lexicon[lower_sub_word]; phones.insert(phones.end(), sub_phones.begin(), sub_phones.end()); tones.insert(tones.end(), sub_tones.begin(), sub_tones.end()); - parts.push_back(sub_word); phonetic_parts.push_back(phonesToString(sub_phones)); - - SLOGI(" Matched: '%s' -> %s", sub_word.c_str(), phonesToString(sub_phones).c_str()); - + DEBUG_LOG(" Matched: '%s' -> %s", sub_word.c_str(), phonesToString(sub_phones).c_str()); start += len; matched = true; break; } } - if (!matched) { std::string single_char = word.substr(start, 1); std::string lower_char = single_char; std::transform(lower_char.begin(), lower_char.end(), lower_char.begin(), [](unsigned char c) { return std::tolower(c); }); - if (lexicon.find(lower_char) != lexicon.end()) { auto& [char_phones, char_tones] = lexicon[lower_char]; phones.insert(phones.end(), char_phones.begin(), char_phones.end()); tones.insert(tones.end(), char_tones.begin(), char_tones.end()); - parts.push_back(single_char); phonetic_parts.push_back(phonesToString(char_phones)); - - SLOGI(" Single char: '%s' -> %s", single_char.c_str(), phonesToString(char_phones).c_str()); + DEBUG_LOG(" Single char: '%s' -> %s", single_char.c_str(), phonesToString(char_phones).c_str()); } else { phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end()); tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end()); - parts.push_back(single_char); phonetic_parts.push_back("_unknown_"); - - SLOGI(" Unknown: '%s'", single_char.c_str()); + DEBUG_LOG(" Unknown: '%s'", single_char.c_str()); } - start++; } } - std::string parts_str, phonetic_str; for (size_t i = 0; i < parts.size(); i++) { if (i > 0) { @@ -191,20 +191,20 @@ class Lexicon { parts_str += parts[i]; phonetic_str += phonetic_parts[i]; } - - SLOGI("%s\t|\tDecomposed: %s\t|\tPhonetics: %s", orig_word.c_str(), parts_str.c_str(), phonetic_str.c_str()); + DEBUG_LOG("%s\t|\tDecomposed: %s\t|\tPhonetics: %s", orig_word.c_str(), parts_str.c_str(), + phonetic_str.c_str()); } + void convert(const std::string& text, std::vector& phones, std::vector& tones) { - SLOGI("\n开始处理文本: \"%s\"", text.c_str()); - SLOGI("=======匹配结果======="); - SLOGI("单元\t|\t音素\t|\t声调"); - SLOGI("-----------------------------"); + DEBUG_LOG("\nStarting text processing: \"%s\"", text.c_str()); + DEBUG_LOG("=======Matching Results======="); + DEBUG_LOG("Unit\t|\tPhonemes\t|\tTones"); + DEBUG_LOG("-----------------------------"); phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end()); tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end()); - - SLOGI("\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(), - tonesToString(unknown_token.second).c_str()); + DEBUG_LOG("\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(), + tonesToString(unknown_token.second).c_str()); auto chars = splitEachChar(text); int i = 0; while (i < chars.size()) { @@ -221,8 +221,8 @@ class Lexicon { auto& [eng_phones, eng_tones] = lexicon[eng_word]; phones.insert(phones.end(), eng_phones.begin(), eng_phones.end()); tones.insert(tones.end(), eng_tones.begin(), eng_tones.end()); - SLOGI("%s\t|\t%s\t|\t%s", orig_word.c_str(), phonesToString(eng_phones).c_str(), - tonesToString(eng_tones).c_str()); + DEBUG_LOG("%s\t|\t%s\t|\t%s", orig_word.c_str(), phonesToString(eng_phones).c_str(), + tonesToString(eng_tones).c_str()); } else { process_unknown_english(orig_word, phones, tones); } @@ -241,8 +241,8 @@ class Lexicon { auto& [phrase_phones, phrase_tones] = lexicon[phrase]; phones.insert(phones.end(), phrase_phones.begin(), phrase_phones.end()); tones.insert(tones.end(), phrase_tones.begin(), phrase_tones.end()); - SLOGI("%s\t|\t%s\t|\t%s", phrase.c_str(), phonesToString(phrase_phones).c_str(), - tonesToString(phrase_tones).c_str()); + DEBUG_LOG("%s\t|\t%s\t|\t%s", phrase.c_str(), phonesToString(phrase_phones).c_str(), + tonesToString(phrase_tones).c_str()); i += len; matched = true; break; @@ -264,25 +264,25 @@ class Lexicon { auto& [char_phones, char_tones] = lexicon[s]; phones.insert(phones.end(), char_phones.begin(), char_phones.end()); tones.insert(tones.end(), char_tones.begin(), char_tones.end()); - SLOGI("%s\t|\t%s\t|\t%s", orig_char.c_str(), phonesToString(char_phones).c_str(), - tonesToString(char_tones).c_str()); + DEBUG_LOG("%s\t|\t%s\t|\t%s", orig_char.c_str(), phonesToString(char_phones).c_str(), + tonesToString(char_tones).c_str()); } else { phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end()); tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end()); - SLOGI("%s\t|\t%s (未匹配)\t|\t%s", orig_char.c_str(), phonesToString(unknown_token.first).c_str(), - tonesToString(unknown_token.second).c_str()); + DEBUG_LOG("%s\t|\t%s (Not matched)\t|\t%s", orig_char.c_str(), + phonesToString(unknown_token.first).c_str(), tonesToString(unknown_token.second).c_str()); } } } phones.insert(phones.end(), unknown_token.first.begin(), unknown_token.first.end()); tones.insert(tones.end(), unknown_token.second.begin(), unknown_token.second.end()); - SLOGI("\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(), - tonesToString(unknown_token.second).c_str()); - SLOGI("\n处理结果汇总:"); - SLOGI("原文: %s", text.c_str()); - SLOGI("音素: %s", phonesToString(phones).c_str()); - SLOGI("声调: %s", tonesToString(tones).c_str()); - SLOGI("===================="); + DEBUG_LOG("\t|\t%s\t|\t%s", phonesToString(unknown_token.first).c_str(), + tonesToString(unknown_token.second).c_str()); + DEBUG_LOG("\nProcessing Summary:"); + DEBUG_LOG("Original text: %s", text.c_str()); + DEBUG_LOG("Phonemes: %s", phonesToString(phones).c_str()); + DEBUG_LOG("Tones: %s", tonesToString(tones).c_str()); + DEBUG_LOG("===================="); } private: