Files
keyboard/keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m

528 lines
14 KiB
Objective-C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// ConversationOrchestrator.m
// keyBoard
//
// Created by Mac on 2026/1/15.
//
#import "ConversationOrchestrator.h"
#import "ASRStreamClient.h"
#import "AudioCaptureManager.h"
#import "AudioSessionManager.h"
#import "LLMStreamClient.h"
#import "Segmenter.h"
#import "SubtitleSync.h"
#import "TTSPlaybackPipeline.h"
#import "TTSServiceClient.h"
@interface ConversationOrchestrator () <
AudioSessionManagerDelegate, AudioCaptureManagerDelegate,
ASRStreamClientDelegate, LLMStreamClientDelegate, TTSServiceClientDelegate,
TTSPlaybackPipelineDelegate>
// 模块
@property(nonatomic, strong) AudioSessionManager *audioSession;
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
@property(nonatomic, strong) ASRStreamClient *asrClient;
@property(nonatomic, strong) LLMStreamClient *llmClient;
@property(nonatomic, strong) Segmenter *segmenter;
@property(nonatomic, strong) TTSServiceClient *ttsClient;
@property(nonatomic, strong) TTSPlaybackPipeline *playbackPipeline;
@property(nonatomic, strong) SubtitleSync *subtitleSync;
// 状态
@property(nonatomic, assign) ConversationState state;
@property(nonatomic, copy) NSString *conversationId;
@property(nonatomic, copy) NSString *currentSessionId;
// 文本跟踪
@property(nonatomic, strong) NSMutableString *fullAssistantText;
@property(nonatomic, strong)
NSMutableDictionary<NSString *, NSString *> *segmentTextMap;
@property(nonatomic, assign) NSInteger segmentCounter;
// 队列
@property(nonatomic, strong) dispatch_queue_t orchestratorQueue;
@end
@implementation ConversationOrchestrator
#pragma mark - Initialization
- (instancetype)init {
self = [super init];
if (self) {
_orchestratorQueue = dispatch_queue_create(
"com.keyboard.aitalk.orchestrator", DISPATCH_QUEUE_SERIAL);
_state = ConversationStateIdle;
_conversationId = [[NSUUID UUID] UUIDString];
_fullAssistantText = [[NSMutableString alloc] init];
_segmentTextMap = [[NSMutableDictionary alloc] init];
_segmentCounter = 0;
[self setupModules];
}
return self;
}
- (void)setupModules {
// Audio Session
self.audioSession = [AudioSessionManager sharedManager];
self.audioSession.delegate = self;
// Audio Capture
self.audioCapture = [[AudioCaptureManager alloc] init];
self.audioCapture.delegate = self;
// ASR Client
self.asrClient = [[ASRStreamClient alloc] init];
self.asrClient.delegate = self;
// LLM Client
self.llmClient = [[LLMStreamClient alloc] init];
self.llmClient.delegate = self;
// Segmenter
self.segmenter = [[Segmenter alloc] init];
// TTS Client
self.ttsClient = [[TTSServiceClient alloc] init];
self.ttsClient.delegate = self;
// Playback Pipeline
self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];
self.playbackPipeline.delegate = self;
// Subtitle Sync
self.subtitleSync = [[SubtitleSync alloc] init];
}
#pragma mark - Configuration Setters
- (void)setAsrServerURL:(NSString *)asrServerURL {
_asrServerURL = [asrServerURL copy];
self.asrClient.serverURL = asrServerURL;
}
- (void)setLlmServerURL:(NSString *)llmServerURL {
_llmServerURL = [llmServerURL copy];
self.llmClient.serverURL = llmServerURL;
}
- (void)setTtsServerURL:(NSString *)ttsServerURL {
_ttsServerURL = [ttsServerURL copy];
self.ttsClient.serverURL = ttsServerURL;
}
#pragma mark - User Actions
- (void)userDidPressRecord {
dispatch_async(self.orchestratorQueue, ^{
NSLog(@"[Orchestrator] userDidPressRecord, current state: %ld",
(long)self.state);
// 如果正在播放或思考,执行打断
if (self.state == ConversationStateSpeaking ||
self.state == ConversationStateThinking) {
[self performBargein];
}
// 检查麦克风权限
if (![self.audioSession hasMicrophonePermission]) {
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
if (granted) {
dispatch_async(self.orchestratorQueue, ^{
[self startRecording];
});
}
}];
return;
}
[self startRecording];
});
}
- (void)userDidReleaseRecord {
dispatch_async(self.orchestratorQueue, ^{
NSLog(@"[Orchestrator] userDidReleaseRecord, current state: %ld",
(long)self.state);
if (self.state != ConversationStateListening) {
return;
}
// 停止采集
[self.audioCapture stopCapture];
// 请求 ASR 最终结果
[self.asrClient finalize];
// 更新状态
[self updateState:ConversationStateRecognizing];
});
}
- (void)stop {
dispatch_async(self.orchestratorQueue, ^{
[self cancelAll];
[self updateState:ConversationStateIdle];
});
}
#pragma mark - Private: Recording
- (void)startRecording {
// 配置音频会话
NSError *error = nil;
if (![self.audioSession configureForConversation:&error]) {
[self reportError:error];
return;
}
if (![self.audioSession activateSession:&error]) {
[self reportError:error];
return;
}
// 生成新的会话 ID
self.currentSessionId = [[NSUUID UUID] UUIDString];
// 启动 ASR
[self.asrClient startWithSessionId:self.currentSessionId];
// 启动音频采集
if (![self.audioCapture startCapture:&error]) {
[self reportError:error];
[self.asrClient cancel];
return;
}
// 更新状态
[self updateState:ConversationStateListening];
}
#pragma mark - Private: Barge-in (打断)
- (void)performBargein {
NSLog(@"[Orchestrator] Performing barge-in");
// 取消所有正在进行的请求
[self.ttsClient cancel];
[self.llmClient cancel];
[self.asrClient cancel];
// 停止播放
[self.playbackPipeline stop];
// 清空状态
[self.segmenter reset];
[self.segmentTextMap removeAllObjects];
[self.fullAssistantText setString:@""];
self.segmentCounter = 0;
}
- (void)cancelAll {
[self.audioCapture stopCapture];
[self.asrClient cancel];
[self.llmClient cancel];
[self.ttsClient cancel];
[self.playbackPipeline stop];
[self.segmenter reset];
[self.audioSession deactivateSession];
}
#pragma mark - Private: State Management
- (void)updateState:(ConversationState)newState {
if (self.state == newState)
return;
ConversationState oldState = self.state;
self.state = newState;
NSLog(@"[Orchestrator] State: %ld -> %ld", (long)oldState, (long)newState);
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onStateChange) {
self.onStateChange(newState);
}
// 特殊状态回调
if (newState == ConversationStateSpeaking &&
oldState != ConversationStateSpeaking) {
if (self.onSpeakingStart) {
self.onSpeakingStart();
}
}
if (oldState == ConversationStateSpeaking &&
newState != ConversationStateSpeaking) {
if (self.onSpeakingEnd) {
self.onSpeakingEnd();
}
}
});
}
- (void)reportError:(NSError *)error {
NSLog(@"[Orchestrator] Error: %@", error.localizedDescription);
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onError) {
self.onError(error);
}
});
}
#pragma mark - AudioCaptureManagerDelegate
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
// 发送到 ASR
[self.asrClient sendAudioPCMFrame:pcmFrame];
}
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onVolumeUpdate) {
self.onVolumeUpdate(rms);
}
});
}
#pragma mark - AudioSessionManagerDelegate
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
dispatch_async(self.orchestratorQueue, ^{
if (type == KBAudioSessionInterruptionTypeBegan) {
// 中断开始:停止采集和播放
[self cancelAll];
[self updateState:ConversationStateIdle];
}
});
}
- (void)audioSessionManagerMicrophonePermissionDenied {
NSError *error =
[NSError errorWithDomain:@"ConversationOrchestrator"
code:-1
userInfo:@{
NSLocalizedDescriptionKey : @"请在设置中开启麦克风权限"
}];
[self reportError:error];
}
#pragma mark - ASRStreamClientDelegate
- (void)asrClientDidReceivePartialText:(NSString *)text {
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onPartialText) {
self.onPartialText(text);
}
});
}
- (void)asrClientDidReceiveFinalText:(NSString *)text {
dispatch_async(self.orchestratorQueue, ^{
NSLog(@"[Orchestrator] ASR final text: %@", text);
// 回调用户文本
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onUserFinalText) {
self.onUserFinalText(text);
}
});
// 如果文本为空,回到空闲
if (text.length == 0) {
[self updateState:ConversationStateIdle];
return;
}
// 更新状态并开始 LLM 请求
[self updateState:ConversationStateThinking];
// 重置文本跟踪
[self.fullAssistantText setString:@""];
[self.segmentTextMap removeAllObjects];
self.segmentCounter = 0;
[self.segmenter reset];
// 启动播放管线
NSError *error = nil;
if (![self.playbackPipeline start:&error]) {
NSLog(@"[Orchestrator] Failed to start playback pipeline: %@",
error.localizedDescription);
}
// 发送 LLM 请求
[self.llmClient sendUserText:text conversationId:self.conversationId];
});
}
- (void)asrClientDidFail:(NSError *)error {
dispatch_async(self.orchestratorQueue, ^{
[self reportError:error];
[self updateState:ConversationStateIdle];
});
}
#pragma mark - LLMStreamClientDelegate
- (void)llmClientDidReceiveToken:(NSString *)token {
dispatch_async(self.orchestratorQueue, ^{
// 追加到完整文本
[self.fullAssistantText appendString:token];
// 追加到分段器
[self.segmenter appendToken:token];
// 检查是否有可触发 TTS 的片段
NSArray<NSString *> *segments = [self.segmenter popReadySegments];
for (NSString *segmentText in segments) {
[self requestTTSForSegment:segmentText];
}
});
}
- (void)llmClientDidComplete {
dispatch_async(self.orchestratorQueue, ^{
NSLog(@"[Orchestrator] LLM complete");
// 处理剩余片段
NSString *remaining = [self.segmenter flushRemainingSegment];
if (remaining && remaining.length > 0) {
[self requestTTSForSegment:remaining];
}
// 回调完整文本
NSString *fullText = [self.fullAssistantText copy];
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onAssistantFullText) {
self.onAssistantFullText(fullText);
}
});
});
}
- (void)llmClientDidFail:(NSError *)error {
dispatch_async(self.orchestratorQueue, ^{
[self reportError:error];
[self updateState:ConversationStateIdle];
});
}
#pragma mark - Private: TTS Request
- (void)requestTTSForSegment:(NSString *)segmentText {
NSString *segmentId =
[NSString stringWithFormat:@"seg_%ld", (long)self.segmentCounter++];
// 记录片段文本
self.segmentTextMap[segmentId] = segmentText;
NSLog(@"[Orchestrator] Requesting TTS for segment %@: %@", segmentId,
segmentText);
// 请求 TTS
[self.ttsClient requestTTSForText:segmentText segmentId:segmentId];
}
#pragma mark - TTSServiceClientDelegate
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId {
dispatch_async(self.orchestratorQueue, ^{
[self.playbackPipeline enqueueURL:url segmentId:segmentId];
// 如果还在 Thinking切换到 Speaking
if (self.state == ConversationStateThinking) {
[self updateState:ConversationStateSpeaking];
}
});
}
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
payloadType:(TTSPayloadType)type
segmentId:(NSString *)segmentId {
dispatch_async(self.orchestratorQueue, ^{
[self.playbackPipeline enqueueChunk:chunk
payloadType:type
segmentId:segmentId];
// 如果还在 Thinking切换到 Speaking
if (self.state == ConversationStateThinking) {
[self updateState:ConversationStateSpeaking];
}
});
}
- (void)ttsClientDidFinishSegment:(NSString *)segmentId {
dispatch_async(self.orchestratorQueue, ^{
[self.playbackPipeline markSegmentComplete:segmentId];
});
}
- (void)ttsClientDidFail:(NSError *)error {
dispatch_async(self.orchestratorQueue, ^{
[self reportError:error];
});
}
#pragma mark - TTSPlaybackPipelineDelegate
- (void)pipelineDidStartSegment:(NSString *)segmentId
duration:(NSTimeInterval)duration {
NSLog(@"[Orchestrator] Started playing segment: %@", segmentId);
}
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
segmentId:(NSString *)segmentId {
dispatch_async(self.orchestratorQueue, ^{
// 获取片段文本
NSString *segmentText = self.segmentTextMap[segmentId];
if (!segmentText)
return;
// 计算可见文本
NSTimeInterval duration =
[self.playbackPipeline durationForSegment:segmentId];
NSString *visibleText =
[self.subtitleSync visibleTextForFullText:segmentText
currentTime:time
duration:duration];
// TODO: 这里应该累加之前片段的文本,实现完整的打字机效果
// 简化实现:只显示当前片段
dispatch_async(dispatch_get_main_queue(), ^{
if (self.onAssistantVisibleText) {
self.onAssistantVisibleText(visibleText);
}
});
});
}
- (void)pipelineDidFinishSegment:(NSString *)segmentId {
NSLog(@"[Orchestrator] Finished playing segment: %@", segmentId);
}
- (void)pipelineDidFinishAllSegments {
dispatch_async(self.orchestratorQueue, ^{
NSLog(@"[Orchestrator] All segments finished");
// 回到空闲状态
[self updateState:ConversationStateIdle];
[self.audioSession deactivateSession];
});
}
- (void)pipelineDidFail:(NSError *)error {
dispatch_async(self.orchestratorQueue, ^{
[self reportError:error];
[self updateState:ConversationStateIdle];
});
}
@end