1
This commit is contained in:
@@ -90,6 +90,11 @@
|
||||
// TTS Client
|
||||
self.ttsClient = [[TTSServiceClient alloc] init];
|
||||
self.ttsClient.delegate = self;
|
||||
// ElevenLabs 配置(通过后端代理)
|
||||
self.ttsClient.voiceId = @"JBFqnCBsd6RMkjVDRZzb"; // 默认语音 George
|
||||
self.ttsClient.languageCode = @"zh"; // 中文
|
||||
self.ttsClient.expectedPayloadType =
|
||||
TTSPayloadTypeURL; // 使用 URL 模式(简单)
|
||||
|
||||
// Playback Pipeline
|
||||
self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];
|
||||
|
||||
@@ -41,6 +41,12 @@ typedef NS_ENUM(NSInteger, TTSPayloadType) {
|
||||
/// TTS 服务器 URL
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
/// 语音 ID(ElevenLabs voice ID)
|
||||
@property(nonatomic, copy) NSString *voiceId;
|
||||
|
||||
/// 语言代码(如 "zh", "en")
|
||||
@property(nonatomic, copy) NSString *languageCode;
|
||||
|
||||
/// 当前期望的返回类型(由服务端配置决定)
|
||||
@property(nonatomic, assign) TTSPayloadType expectedPayloadType;
|
||||
|
||||
|
||||
@@ -94,6 +94,8 @@
|
||||
NSDictionary *body = @{
|
||||
@"text" : text,
|
||||
@"segmentId" : segmentId,
|
||||
@"voiceId" : self.voiceId ?: @"JBFqnCBsd6RMkjVDRZzb",
|
||||
@"languageCode" : self.languageCode ?: @"zh",
|
||||
@"format" : @"mp3" // 或 m4a
|
||||
};
|
||||
|
||||
@@ -184,6 +186,8 @@
|
||||
NSDictionary *requestDict = @{
|
||||
@"text" : text,
|
||||
@"segmentId" : segmentId,
|
||||
@"voiceId" : self.voiceId ?: @"JBFqnCBsd6RMkjVDRZzb",
|
||||
@"languageCode" : self.languageCode ?: @"zh",
|
||||
@"format" : [self formatStringForPayloadType:self.expectedPayloadType]
|
||||
};
|
||||
|
||||
|
||||
53
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.h
Normal file
53
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// VoiceChatStreamingManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol VoiceChatStreamingManagerDelegate <NSObject>
|
||||
@optional
|
||||
- (void)voiceChatStreamingManagerDidConnect;
|
||||
- (void)voiceChatStreamingManagerDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)voiceChatStreamingManagerDidStartSession:(NSString *)sessionId;
|
||||
- (void)voiceChatStreamingManagerDidStartTurn:(NSInteger)turnIndex;
|
||||
- (void)voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
|
||||
confidence:(double)confidence;
|
||||
- (void)voiceChatStreamingManagerDidResumeTurn;
|
||||
- (void)voiceChatStreamingManagerDidUpdateRMS:(float)rms;
|
||||
- (void)voiceChatStreamingManagerDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)voiceChatStreamingManagerDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)voiceChatStreamingManagerDidReceiveLLMStart;
|
||||
- (void)voiceChatStreamingManagerDidReceiveLLMToken:(NSString *)token;
|
||||
- (void)voiceChatStreamingManagerDidReceiveAudioChunk:(NSData *)audioData;
|
||||
- (void)voiceChatStreamingManagerDidCompleteWithTranscript:(NSString *)transcript
|
||||
aiResponse:(NSString *)aiResponse;
|
||||
- (void)voiceChatStreamingManagerDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// Manager for realtime recording and streaming.
|
||||
@interface VoiceChatStreamingManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<VoiceChatStreamingManagerDelegate> delegate;
|
||||
|
||||
/// Base WebSocket URL, e.g. wss://api.yourdomain.com/api/ws/chat
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isStreaming) BOOL streaming;
|
||||
@property(nonatomic, copy, readonly, nullable) NSString *sessionId;
|
||||
|
||||
- (void)startWithToken:(NSString *)token
|
||||
language:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId;
|
||||
|
||||
- (void)stopAndFinalize;
|
||||
- (void)cancel;
|
||||
- (void)disconnect;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
376
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.m
Normal file
376
keyBoard/Class/AiTalk/VM/VoiceChatStreamingManager.m
Normal file
@@ -0,0 +1,376 @@
|
||||
//
|
||||
// VoiceChatStreamingManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "VoiceChatStreamingManager.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
#import "AudioSessionManager.h"
|
||||
#import "VoiceChatWebSocketClient.h"
|
||||
|
||||
static NSString *const kVoiceChatStreamingManagerErrorDomain =
|
||||
@"VoiceChatStreamingManager";
|
||||
|
||||
@interface VoiceChatStreamingManager () <AudioSessionManagerDelegate,
|
||||
AudioCaptureManagerDelegate,
|
||||
VoiceChatWebSocketClientDelegate>
|
||||
|
||||
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||||
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||||
@property(nonatomic, strong) VoiceChatWebSocketClient *webSocketClient;
|
||||
@property(nonatomic, strong) dispatch_queue_t stateQueue;
|
||||
|
||||
@property(nonatomic, assign) BOOL streaming;
|
||||
@property(nonatomic, copy) NSString *sessionId;
|
||||
|
||||
@property(nonatomic, copy) NSString *pendingToken;
|
||||
@property(nonatomic, copy) NSString *pendingLanguage;
|
||||
@property(nonatomic, copy) NSString *pendingVoiceId;
|
||||
|
||||
@end
|
||||
|
||||
@implementation VoiceChatStreamingManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.manager",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
|
||||
_audioSession = [AudioSessionManager sharedManager];
|
||||
_audioSession.delegate = self;
|
||||
|
||||
_audioCapture = [[AudioCaptureManager alloc] init];
|
||||
_audioCapture.delegate = self;
|
||||
|
||||
_webSocketClient = [[VoiceChatWebSocketClient alloc] init];
|
||||
_webSocketClient.delegate = self;
|
||||
|
||||
_serverURL = @"ws://192.168.2.21:7529/api/ws/chat?token=";
|
||||
_webSocketClient.serverURL = _serverURL;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
- (void)setServerURL:(NSString *)serverURL {
|
||||
_serverURL = [serverURL copy];
|
||||
self.webSocketClient.serverURL = _serverURL;
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)startWithToken:(NSString *)token
|
||||
language:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.pendingToken = token ?: @"";
|
||||
self.pendingLanguage = language ?: @"";
|
||||
self.pendingVoiceId = voiceId ?: @"";
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self startInternal];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stopAndFinalize {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient endAudio];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient cancel];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.webSocketClient disableAudioSending];
|
||||
[self.webSocketClient disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)startInternal {
|
||||
if (self.pendingToken.length == 0) {
|
||||
NSLog(@"[VoiceChatStreamingManager] Start failed: token is empty");
|
||||
[self reportErrorWithMessage:@"Token is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession hasMicrophonePermission]) {
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
if (!granted) {
|
||||
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
|
||||
return;
|
||||
}
|
||||
dispatch_async(strongSelf.stateQueue, ^{
|
||||
[strongSelf startInternal];
|
||||
});
|
||||
}];
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioSession configureForConversation:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession activateSession:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.serverURL.length == 0) {
|
||||
NSLog(@"[VoiceChatStreamingManager] Start failed: server URL is empty");
|
||||
[self reportErrorWithMessage:@"Server URL is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[VoiceChatStreamingManager] Start streaming, server: %@",
|
||||
self.serverURL);
|
||||
self.webSocketClient.serverURL = self.serverURL;
|
||||
[self.webSocketClient connectWithToken:self.pendingToken];
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidFail:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
#pragma mark - AudioCaptureManagerDelegate
|
||||
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.streaming) {
|
||||
return;
|
||||
}
|
||||
[self.webSocketClient sendAudioPCMFrame:pcmFrame];
|
||||
}
|
||||
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidUpdateRMS:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidUpdateRMS:rms];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioSessionManagerDelegate
|
||||
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||||
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||||
[self cancel];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||||
[self reportErrorWithMessage:@"Microphone permission denied"];
|
||||
}
|
||||
|
||||
#pragma mark - VoiceChatWebSocketClientDelegate
|
||||
|
||||
- (void)voiceChatClientDidConnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
[self.webSocketClient startSessionWithLanguage:self.pendingLanguage
|
||||
voiceId:self.pendingVoiceId];
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidConnect)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.audioSession deactivateSession];
|
||||
self.sessionId = nil;
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidDisconnect:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidStartSession:(NSString *)sessionId {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.sessionId = sessionId;
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioCapture startCapture:&error]) {
|
||||
[self reportError:error];
|
||||
[self.webSocketClient cancel];
|
||||
return;
|
||||
}
|
||||
|
||||
self.streaming = YES;
|
||||
[self.webSocketClient enableAudioSending];
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidStartSession:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidStartSession:sessionId];
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidStartTurn:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidStartTurn:turnIndex];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
|
||||
confidence:(double)confidence {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:
|
||||
confidence:)]) {
|
||||
[self.delegate
|
||||
voiceChatStreamingManagerDidReceiveEagerEndOfTurnWithTranscript:text
|
||||
confidence:confidence];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidResumeTurn {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidResumeTurn)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidResumeTurn];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveInterimTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveFinalTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveLLMStart {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveLLMStart)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveLLMStart];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveLLMToken:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveLLMToken:token];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidReceiveAudioChunk:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidReceiveAudioChunk:audioData];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
|
||||
aiResponse:(NSString *)aiResponse {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatStreamingManagerDidCompleteWithTranscript:
|
||||
aiResponse:)]) {
|
||||
[self.delegate voiceChatStreamingManagerDidCompleteWithTranscript:transcript
|
||||
aiResponse:aiResponse];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
|
||||
message:(NSString *)message {
|
||||
NSString *desc = message.length > 0 ? message : @"Server error";
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatStreamingManagerErrorDomain
|
||||
code:-2
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : desc,
|
||||
@"code" : code ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)voiceChatClientDidFail:(NSError *)error {
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
@end
|
||||
57
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.h
Normal file
57
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.h
Normal file
@@ -0,0 +1,57 @@
|
||||
//
|
||||
// VoiceChatWebSocketClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol VoiceChatWebSocketClientDelegate <NSObject>
|
||||
@optional
|
||||
- (void)voiceChatClientDidConnect;
|
||||
- (void)voiceChatClientDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)voiceChatClientDidStartSession:(NSString *)sessionId;
|
||||
- (void)voiceChatClientDidStartTurn:(NSInteger)turnIndex;
|
||||
- (void)voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:(NSString *)text
|
||||
confidence:(double)confidence;
|
||||
- (void)voiceChatClientDidResumeTurn;
|
||||
- (void)voiceChatClientDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)voiceChatClientDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)voiceChatClientDidReceiveLLMStart;
|
||||
- (void)voiceChatClientDidReceiveLLMToken:(NSString *)token;
|
||||
- (void)voiceChatClientDidReceiveAudioChunk:(NSData *)audioData;
|
||||
- (void)voiceChatClientDidCompleteWithTranscript:(NSString *)transcript
|
||||
aiResponse:(NSString *)aiResponse;
|
||||
- (void)voiceChatClientDidReceiveErrorCode:(NSString *)code
|
||||
message:(NSString *)message;
|
||||
- (void)voiceChatClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// WebSocket client for realtime voice chat.
|
||||
@interface VoiceChatWebSocketClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<VoiceChatWebSocketClientDelegate> delegate;
|
||||
|
||||
/// Base WebSocket URL, e.g. wss://api.yourdomain.com/api/ws/chat
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
|
||||
@property(nonatomic, copy, readonly, nullable) NSString *sessionId;
|
||||
|
||||
- (void)connectWithToken:(NSString *)token;
|
||||
- (void)disconnect;
|
||||
|
||||
- (void)startSessionWithLanguage:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId;
|
||||
- (void)enableAudioSending;
|
||||
- (void)disableAudioSending;
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
|
||||
- (void)endAudio;
|
||||
- (void)cancel;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
457
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m
Normal file
457
keyBoard/Class/AiTalk/VM/VoiceChatWebSocketClient.m
Normal file
@@ -0,0 +1,457 @@
|
||||
//
|
||||
// VoiceChatWebSocketClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "VoiceChatWebSocketClient.h"
|
||||
|
||||
static NSString *const kVoiceChatWebSocketClientErrorDomain =
|
||||
@"VoiceChatWebSocketClient";
|
||||
|
||||
@interface VoiceChatWebSocketClient () <NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL connected;
|
||||
@property(nonatomic, copy) NSString *sessionId;
|
||||
@property(nonatomic, assign) BOOL audioSendingEnabled;
|
||||
|
||||
@end
|
||||
|
||||
@implementation VoiceChatWebSocketClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.voicechat.ws",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_serverURL = @"wss://api.yourdomain.com/api/ws/chat";
|
||||
_audioSendingEnabled = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)connectWithToken:(NSString *)token {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self disconnectInternal];
|
||||
|
||||
NSURL *url = [self buildURLWithToken:token];
|
||||
if (!url) {
|
||||
[self reportErrorWithMessage:@"Invalid server URL"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[VoiceChatWebSocketClient] Connecting: %@", url.absoluteString);
|
||||
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
|
||||
[self.webSocketTask resume];
|
||||
[self receiveMessage];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
BOOL shouldNotify = self.webSocketTask != nil;
|
||||
if (shouldNotify) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Disconnect requested");
|
||||
}
|
||||
[self disconnectInternal];
|
||||
if (shouldNotify) {
|
||||
[self notifyDisconnect:nil];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)startSessionWithLanguage:(nullable NSString *)language
|
||||
voiceId:(nullable NSString *)voiceId {
|
||||
NSMutableDictionary *message = [NSMutableDictionary dictionary];
|
||||
message[@"type"] = @"session_start";
|
||||
|
||||
NSMutableDictionary *config = [NSMutableDictionary dictionary];
|
||||
if (language.length > 0) {
|
||||
config[@"language"] = language;
|
||||
}
|
||||
if (voiceId.length > 0) {
|
||||
config[@"voice_id"] = voiceId;
|
||||
}
|
||||
if (config.count > 0) {
|
||||
message[@"config"] = config;
|
||||
}
|
||||
|
||||
[self sendJSON:message];
|
||||
}
|
||||
|
||||
- (void)enableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = YES;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = NO;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
if (!self.audioSendingEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sent audio frame: %lu bytes",
|
||||
(unsigned long)pcmFrame.length);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)endAudio {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sending audio_end");
|
||||
[self sendJSON:@{ @"type" : @"audio_end" }];
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Sending cancel");
|
||||
[self sendJSON:@{ @"type" : @"cancel" }];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (NSURL *)buildURLWithToken:(NSString *)token {
|
||||
if (self.serverURL.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURLComponents *components =
|
||||
[NSURLComponents componentsWithString:self.serverURL];
|
||||
if (!components) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
if (token.length > 0) {
|
||||
NSMutableArray<NSURLQueryItem *> *items =
|
||||
components.queryItems.mutableCopy ?: [NSMutableArray array];
|
||||
BOOL didReplace = NO;
|
||||
for (NSUInteger i = 0; i < items.count; i++) {
|
||||
NSURLQueryItem *item = items[i];
|
||||
if ([item.name isEqualToString:@"token"]) {
|
||||
items[i] = [NSURLQueryItem queryItemWithName:@"token" value:token];
|
||||
didReplace = YES;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!didReplace) {
|
||||
[items addObject:[NSURLQueryItem queryItemWithName:@"token"
|
||||
value:token]];
|
||||
}
|
||||
components.queryItems = items;
|
||||
}
|
||||
|
||||
return components.URL;
|
||||
}
|
||||
|
||||
- (void)sendJSON:(NSDictionary *)dict {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString =
|
||||
[[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
if (!jsonString) {
|
||||
[self reportErrorWithMessage:@"Failed to encode JSON message"];
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)receiveMessage {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
||||
[strongSelf notifyDisconnect:error];
|
||||
[strongSelf disconnectInternal];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Received text: %@", message.string);
|
||||
[strongSelf handleTextMessage:message.string];
|
||||
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
||||
NSLog(@"[VoiceChatWebSocketClient] Received binary: %lu bytes",
|
||||
(unsigned long)message.data.length);
|
||||
[strongSelf handleBinaryMessage:message.data];
|
||||
}
|
||||
|
||||
[strongSelf receiveMessage];
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)handleTextMessage:(NSString *)text {
|
||||
if (text.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *type = json[@"type"];
|
||||
if (type.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ([type isEqualToString:@"session_started"]) {
|
||||
NSString *sessionId = json[@"session_id"] ?: @"";
|
||||
self.sessionId = sessionId;
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidStartSession:)]) {
|
||||
[self.delegate voiceChatClientDidStartSession:sessionId];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"transcript_interim"]) {
|
||||
NSString *transcript = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveInterimTranscript:transcript];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"transcript_final"]) {
|
||||
NSString *transcript = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveFinalTranscript:transcript];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"turn_start"]) {
|
||||
NSInteger turnIndex = [json[@"turn_index"] integerValue];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidStartTurn:)]) {
|
||||
[self.delegate voiceChatClientDidStartTurn:turnIndex];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"eager_eot"]) {
|
||||
NSString *transcript = json[@"transcript"] ?: @"";
|
||||
double confidence = [json[@"confidence"] doubleValue];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:
|
||||
confidence:)]) {
|
||||
[self.delegate
|
||||
voiceChatClientDidReceiveEagerEndOfTurnWithTranscript:transcript
|
||||
confidence:confidence];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"turn_resumed"]) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidResumeTurn)]) {
|
||||
[self.delegate voiceChatClientDidResumeTurn];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"llm_start"]) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveLLMStart)]) {
|
||||
[self.delegate voiceChatClientDidReceiveLLMStart];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"llm_token"]) {
|
||||
NSString *token = json[@"token"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveLLMToken:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveLLMToken:token];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"complete"]) {
|
||||
NSString *transcript = json[@"transcript"] ?: @"";
|
||||
NSString *aiResponse = json[@"ai_response"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidCompleteWithTranscript:
|
||||
aiResponse:)]) {
|
||||
[self.delegate voiceChatClientDidCompleteWithTranscript:transcript
|
||||
aiResponse:aiResponse];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"error"]) {
|
||||
NSString *code = json[@"code"] ?: @"";
|
||||
NSString *message = json[@"message"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidReceiveErrorCode:message:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveErrorCode:code
|
||||
message:message];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
- (void)handleBinaryMessage:(NSData *)data {
|
||||
if (data.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(voiceChatClientDidReceiveAudioChunk:)]) {
|
||||
[self.delegate voiceChatClientDidReceiveAudioChunk:data];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnectInternal {
|
||||
self.connected = NO;
|
||||
self.sessionId = nil;
|
||||
self.audioSendingEnabled = NO;
|
||||
|
||||
if (self.webSocketTask) {
|
||||
[self.webSocketTask
|
||||
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
|
||||
reason:nil];
|
||||
self.webSocketTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidFail:)]) {
|
||||
[self.delegate voiceChatClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kVoiceChatWebSocketClientErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)notifyDisconnect:(NSError *_Nullable)error {
|
||||
self.connected = NO;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(voiceChatClientDidDisconnect:)]) {
|
||||
[self.delegate voiceChatClientDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionWebSocketDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didOpenWithProtocol:(NSString *)protocol {
|
||||
self.connected = YES;
|
||||
NSLog(@"[VoiceChatWebSocketClient] Connected");
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(voiceChatClientDidConnect)]) {
|
||||
[self.delegate voiceChatClientDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
||||
reason:(NSData *)reason {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSLog(@"[VoiceChatWebSocketClient] Closed with code: %ld",
|
||||
(long)closeCode);
|
||||
[self notifyDisconnect:nil];
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user