1
This commit is contained in:
@@ -198,6 +198,10 @@
|
||||
04E038D82F20BFFB002CA5A0 /* websocket-api.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038D72F20BFFB002CA5A0 /* websocket-api.md */; };
|
||||
04E038DD2F20C420002CA5A0 /* VoiceChatStreamingManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038DA2F20C420002CA5A0 /* VoiceChatStreamingManager.m */; };
|
||||
04E038DE2F20C420002CA5A0 /* VoiceChatWebSocketClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038DC2F20C420002CA5A0 /* VoiceChatWebSocketClient.m */; };
|
||||
04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */ = {isa = PBXBuildFile; fileRef = 04E038E22F20E500002CA5A0 /* deepgramAPI.md */; };
|
||||
04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */; };
|
||||
04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */; };
|
||||
04E038EF2F21F0EC002CA5A0 /* AiVM.m in Sources */ = {isa = PBXBuildFile; fileRef = 04E038EE2F21F0EC002CA5A0 /* AiVM.m */; };
|
||||
04E161832F10E6470022C23B /* normal_hei_them.zip in Resources */ = {isa = PBXBuildFile; fileRef = 04E161812F10E6470022C23B /* normal_hei_them.zip */; };
|
||||
04E161842F10E6470022C23B /* normal_them.zip in Resources */ = {isa = PBXBuildFile; fileRef = 04E161822F10E6470022C23B /* normal_them.zip */; };
|
||||
04FC95672EB0546C007BD342 /* KBKey.m in Sources */ = {isa = PBXBuildFile; fileRef = 04FC95652EB0546C007BD342 /* KBKey.m */; };
|
||||
@@ -616,6 +620,13 @@
|
||||
04E038DA2F20C420002CA5A0 /* VoiceChatStreamingManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = VoiceChatStreamingManager.m; sourceTree = "<group>"; };
|
||||
04E038DB2F20C420002CA5A0 /* VoiceChatWebSocketClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = VoiceChatWebSocketClient.h; sourceTree = "<group>"; };
|
||||
04E038DC2F20C420002CA5A0 /* VoiceChatWebSocketClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = VoiceChatWebSocketClient.m; sourceTree = "<group>"; };
|
||||
04E038E22F20E500002CA5A0 /* deepgramAPI.md */ = {isa = PBXFileReference; lastKnownFileType = net.daringfireball.markdown; path = deepgramAPI.md; sourceTree = "<group>"; };
|
||||
04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramStreamingManager.h; sourceTree = "<group>"; };
|
||||
04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramStreamingManager.m; sourceTree = "<group>"; };
|
||||
04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = DeepgramWebSocketClient.h; sourceTree = "<group>"; };
|
||||
04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = DeepgramWebSocketClient.m; sourceTree = "<group>"; };
|
||||
04E038ED2F21F0EC002CA5A0 /* AiVM.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AiVM.h; sourceTree = "<group>"; };
|
||||
04E038EE2F21F0EC002CA5A0 /* AiVM.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AiVM.m; sourceTree = "<group>"; };
|
||||
04E161812F10E6470022C23B /* normal_hei_them.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = normal_hei_them.zip; sourceTree = "<group>"; };
|
||||
04E161822F10E6470022C23B /* normal_them.zip */ = {isa = PBXFileReference; lastKnownFileType = archive.zip; path = normal_them.zip; sourceTree = "<group>"; };
|
||||
04FC953A2EAFAE56007BD342 /* KeyBoardPrefixHeader.pch */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = KeyBoardPrefixHeader.pch; sourceTree = "<group>"; };
|
||||
@@ -998,6 +1009,12 @@
|
||||
04E038DA2F20C420002CA5A0 /* VoiceChatStreamingManager.m */,
|
||||
04E038DB2F20C420002CA5A0 /* VoiceChatWebSocketClient.h */,
|
||||
04E038DC2F20C420002CA5A0 /* VoiceChatWebSocketClient.m */,
|
||||
04E038E42F20E877002CA5A0 /* DeepgramStreamingManager.h */,
|
||||
04E038E52F20E877002CA5A0 /* DeepgramStreamingManager.m */,
|
||||
04E038E62F20E877002CA5A0 /* DeepgramWebSocketClient.h */,
|
||||
04E038E72F20E877002CA5A0 /* DeepgramWebSocketClient.m */,
|
||||
04E038ED2F21F0EC002CA5A0 /* AiVM.h */,
|
||||
04E038EE2F21F0EC002CA5A0 /* AiVM.m */,
|
||||
);
|
||||
path = VM;
|
||||
sourceTree = "<group>";
|
||||
@@ -1007,6 +1024,7 @@
|
||||
children = (
|
||||
046086742F191CC700757C95 /* AI技术分析.txt */,
|
||||
04E038D72F20BFFB002CA5A0 /* websocket-api.md */,
|
||||
04E038E22F20E500002CA5A0 /* deepgramAPI.md */,
|
||||
0460866C2F191A5100757C95 /* M */,
|
||||
0460866D2F191A5100757C95 /* V */,
|
||||
0460866E2F191A5100757C95 /* VC */,
|
||||
@@ -2027,6 +2045,7 @@
|
||||
04286A132ECDEBF900CE730C /* KBSkinIconMap.strings in Resources */,
|
||||
04C6EABD2EAF86530089C901 /* Main.storyboard in Resources */,
|
||||
046086CB2F1A092500757C95 /* comments_mock.json in Resources */,
|
||||
04E038E32F20E500002CA5A0 /* deepgramAPI.md in Resources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@@ -2233,6 +2252,8 @@
|
||||
0498BD712EE02A41006CC1D5 /* KBForgetPwdNewPwdVC.m in Sources */,
|
||||
048908EF2EBF861800FABA60 /* KBSkinSectionTitleCell.m in Sources */,
|
||||
0450AAE22EF03D5100B6AF06 /* KBPerson.swift in Sources */,
|
||||
04E038E82F20E877002CA5A0 /* DeepgramWebSocketClient.m in Sources */,
|
||||
04E038E92F20E877002CA5A0 /* DeepgramStreamingManager.m in Sources */,
|
||||
048908E32EBF821700FABA60 /* KBSkinDetailVC.m in Sources */,
|
||||
0477BDF32EBB7B850055D639 /* KBDirectionIndicatorView.m in Sources */,
|
||||
049FB21A2EC20A9E00FAB05D /* KBMyKeyBoardVC.m in Sources */,
|
||||
@@ -2275,6 +2296,7 @@
|
||||
04FC97092EB31B14007BD342 /* KBHUD.m in Sources */,
|
||||
04FC970E2EB334F8007BD342 /* UIImageView+KBWebImage.m in Sources */,
|
||||
049FB2232EC311F900FAB05D /* KBPersonInfoVC.m in Sources */,
|
||||
04E038EF2F21F0EC002CA5A0 /* AiVM.m in Sources */,
|
||||
0498BD6B2EE025FC006CC1D5 /* KBForgetPwdVC.m in Sources */,
|
||||
046086B12F19239B00757C95 /* SubtitleSync.m in Sources */,
|
||||
046086B22F19239B00757C95 /* TTSServiceClient.m in Sources */,
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#import "KBAiMainVC.h"
|
||||
#import "ConversationOrchestrator.h"
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "KBAICommentView.h"
|
||||
#import "KBAiChatView.h"
|
||||
#import "KBAiRecordButton.h"
|
||||
@@ -15,13 +16,15 @@
|
||||
#import "KBUserSessionManager.h"
|
||||
|
||||
@interface KBAiMainVC () <KBAiRecordButtonDelegate,
|
||||
VoiceChatStreamingManagerDelegate>
|
||||
VoiceChatStreamingManagerDelegate,
|
||||
DeepgramStreamingManagerDelegate>
|
||||
@property(nonatomic, weak) LSTPopView *popView;
|
||||
|
||||
// UI
|
||||
@property(nonatomic, strong) KBAiChatView *chatView;
|
||||
@property(nonatomic, strong) KBAiRecordButton *recordButton;
|
||||
@property(nonatomic, strong) UILabel *statusLabel;
|
||||
@property(nonatomic, strong) UILabel *transcriptLabel;
|
||||
@property(nonatomic, strong) UIButton *commentButton;
|
||||
@property(nonatomic, strong) KBAICommentView *commentView;
|
||||
@property(nonatomic, strong) UIView *tabbarBackgroundView;
|
||||
@@ -32,9 +35,11 @@
|
||||
// 核心模块
|
||||
@property(nonatomic, strong) ConversationOrchestrator *orchestrator;
|
||||
@property(nonatomic, strong) VoiceChatStreamingManager *streamingManager;
|
||||
@property(nonatomic, strong) DeepgramStreamingManager *deepgramManager;
|
||||
|
||||
// 文本跟踪
|
||||
@property(nonatomic, strong) NSMutableString *assistantVisibleText;
|
||||
@property(nonatomic, strong) NSMutableString *deepgramFullText;
|
||||
|
||||
// 日志节流
|
||||
@property(nonatomic, assign) NSTimeInterval lastRMSLogTime;
|
||||
@@ -55,6 +60,7 @@
|
||||
[self setupUI];
|
||||
[self setupOrchestrator];
|
||||
[self setupStreamingManager];
|
||||
[self setupDeepgramManager];
|
||||
}
|
||||
|
||||
- (void)viewWillAppear:(BOOL)animated {
|
||||
@@ -68,6 +74,7 @@
|
||||
// 页面消失时停止对话
|
||||
[self.orchestrator stop];
|
||||
[self.streamingManager disconnect];
|
||||
[self.deepgramManager disconnect];
|
||||
}
|
||||
|
||||
- (void)viewDidLayoutSubviews {
|
||||
@@ -132,6 +139,16 @@
|
||||
self.statusLabel.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.view addSubview:self.statusLabel];
|
||||
|
||||
// 转写文本标签
|
||||
self.transcriptLabel = [[UILabel alloc] init];
|
||||
self.transcriptLabel.text = @"";
|
||||
self.transcriptLabel.font = [UIFont systemFontOfSize:16];
|
||||
self.transcriptLabel.textColor = [UIColor labelColor];
|
||||
self.transcriptLabel.numberOfLines = 0;
|
||||
self.transcriptLabel.textAlignment = NSTextAlignmentLeft;
|
||||
self.transcriptLabel.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.view addSubview:self.transcriptLabel];
|
||||
|
||||
// 聊天视图
|
||||
// self.chatView = [[KBAiChatView alloc] init];
|
||||
// self.chatView.backgroundColor = [UIColor systemBackgroundColor];
|
||||
@@ -177,6 +194,13 @@
|
||||
make.right.equalTo(self.view).offset(-16);
|
||||
}];
|
||||
|
||||
[self.transcriptLabel mas_makeConstraints:^(MASConstraintMaker *make) {
|
||||
make.top.equalTo(self.statusLabel.mas_bottom).offset(8);
|
||||
make.left.equalTo(self.view).offset(16);
|
||||
make.right.equalTo(self.view).offset(-16);
|
||||
make.bottom.lessThanOrEqualTo(self.recordButton.mas_top).offset(-16);
|
||||
}];
|
||||
|
||||
[self.recordButton mas_makeConstraints:^(MASConstraintMaker *make) {
|
||||
make.left.equalTo(self.view.mas_safeAreaLayoutGuideLeft).offset(20);
|
||||
make.right.equalTo(self.view.mas_safeAreaLayoutGuideRight).offset(-20);
|
||||
@@ -304,6 +328,26 @@
|
||||
self.lastRMSLogTime = 0;
|
||||
}
|
||||
|
||||
#pragma mark - Deepgram Manager
|
||||
|
||||
- (void)setupDeepgramManager {
|
||||
self.deepgramManager = [[DeepgramStreamingManager alloc] init];
|
||||
self.deepgramManager.delegate = self;
|
||||
self.deepgramManager.serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
self.deepgramManager.apiKey = @"9c792eb63a65d644cbc95785155754cd1e84f8cf";
|
||||
self.deepgramManager.language = @"en";
|
||||
self.deepgramManager.model = @"nova-3";
|
||||
self.deepgramManager.punctuate = YES;
|
||||
self.deepgramManager.smartFormat = YES;
|
||||
self.deepgramManager.interimResults = YES;
|
||||
self.deepgramManager.encoding = @"linear16";
|
||||
self.deepgramManager.sampleRate = 16000.0;
|
||||
self.deepgramManager.channels = 1;
|
||||
[self.deepgramManager prepareConnection];
|
||||
|
||||
self.deepgramFullText = [[NSMutableString alloc] init];
|
||||
}
|
||||
|
||||
#pragma mark - 事件
|
||||
- (void)showComment {
|
||||
CGFloat customViewHeight = KB_SCREEN_HEIGHT * (0.8);
|
||||
@@ -402,17 +446,19 @@
|
||||
|
||||
self.statusLabel.text = @"正在连接...";
|
||||
self.recordButton.state = KBAiRecordButtonStateRecording;
|
||||
[self.streamingManager startWithToken:token language:@"en-US" voiceId:nil];
|
||||
[self.deepgramFullText setString:@""];
|
||||
self.transcriptLabel.text = @"";
|
||||
[self.deepgramManager start];
|
||||
}
|
||||
|
||||
- (void)recordButtonDidEndPress:(KBAiRecordButton *)button {
|
||||
NSLog(@"[KBAiMainVC] Record button end press");
|
||||
[self.streamingManager stopAndFinalize];
|
||||
[self.deepgramManager stopAndFinalize];
|
||||
}
|
||||
|
||||
- (void)recordButtonDidCancelPress:(KBAiRecordButton *)button {
|
||||
NSLog(@"[KBAiMainVC] Record button cancel press");
|
||||
[self.streamingManager cancel];
|
||||
[self.deepgramManager cancel];
|
||||
}
|
||||
|
||||
#pragma mark - VoiceChatStreamingManagerDelegate
|
||||
@@ -501,4 +547,55 @@
|
||||
[self showError:error];
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramStreamingManagerDelegate
|
||||
|
||||
- (void)deepgramStreamingManagerDidConnect {
|
||||
self.statusLabel.text = @"已连接,准备中...";
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error {
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
if (error) {
|
||||
[self showError:error];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms {
|
||||
[self.recordButton updateVolumeRMS:rms];
|
||||
NSTimeInterval now = [[NSDate date] timeIntervalSince1970];
|
||||
if (now - self.lastRMSLogTime >= 1.0) {
|
||||
self.lastRMSLogTime = now;
|
||||
NSLog(@"[KBAiMainVC] RMS: %.3f", rms);
|
||||
}
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text {
|
||||
self.statusLabel.text = @"正在识别...";
|
||||
NSString *displayText = text ?: @"";
|
||||
if (self.deepgramFullText.length > 0 && displayText.length > 0) {
|
||||
displayText =
|
||||
[NSString stringWithFormat:@"%@ %@", self.deepgramFullText, displayText];
|
||||
} else if (self.deepgramFullText.length > 0) {
|
||||
displayText = [self.deepgramFullText copy];
|
||||
}
|
||||
self.transcriptLabel.text = displayText;
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text {
|
||||
if (text.length > 0) {
|
||||
if (self.deepgramFullText.length > 0) {
|
||||
[self.deepgramFullText appendString:@" "];
|
||||
}
|
||||
[self.deepgramFullText appendString:text];
|
||||
}
|
||||
self.transcriptLabel.text = self.deepgramFullText;
|
||||
self.statusLabel.text = @"识别完成";
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
}
|
||||
|
||||
- (void)deepgramStreamingManagerDidFail:(NSError *)error {
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
[self showError:error];
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
16
keyBoard/Class/AiTalk/VM/AiVM.h
Normal file
16
keyBoard/Class/AiTalk/VM/AiVM.h
Normal file
@@ -0,0 +1,16 @@
|
||||
//
|
||||
// AiVM.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/22.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface AiVM : NSObject
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
12
keyBoard/Class/AiTalk/VM/AiVM.m
Normal file
12
keyBoard/Class/AiTalk/VM/AiVM.m
Normal file
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// AiVM.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/22.
|
||||
//
|
||||
|
||||
#import "AiVM.h"
|
||||
|
||||
@implementation AiVM
|
||||
|
||||
@end
|
||||
@@ -179,6 +179,11 @@ static const float kAudioSoftwareGain = 2.5f;
|
||||
}
|
||||
|
||||
// 获取 Int16 数据
|
||||
if (!outputBuffer.int16ChannelData) {
|
||||
NSLog(@"[AudioCaptureManager] Int16 channel data is null");
|
||||
return;
|
||||
}
|
||||
|
||||
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
|
||||
NSUInteger sampleCount = outputBuffer.frameLength;
|
||||
NSUInteger byteCount = sampleCount * sizeof(int16_t);
|
||||
@@ -189,13 +194,20 @@ static const float kAudioSoftwareGain = 2.5f;
|
||||
[self calculateAndReportRMS:samples sampleCount:sampleCount];
|
||||
[self logAudioStatsIfNeeded:samples sampleCount:sampleCount];
|
||||
|
||||
if (byteCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSData *pcmData = [NSData dataWithBytes:samples length:byteCount];
|
||||
|
||||
// 将数据添加到 ring buffer 并输出完整帧
|
||||
dispatch_async(self.audioQueue, ^{
|
||||
[self appendToRingBuffer:samples byteCount:byteCount];
|
||||
[self appendToRingBuffer:(const uint8_t *)pcmData.bytes
|
||||
byteCount:pcmData.length];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount {
|
||||
- (void)appendToRingBuffer:(const uint8_t *)bytes byteCount:(NSUInteger)byteCount {
|
||||
// 将新数据追加到 ring buffer
|
||||
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
|
||||
NSUInteger ringBufferLength = self.ringBuffer.length;
|
||||
@@ -208,7 +220,7 @@ static const float kAudioSoftwareGain = 2.5f;
|
||||
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
|
||||
|
||||
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
|
||||
(uint8_t *)samples + sourceOffset, copySize);
|
||||
bytes + sourceOffset, copySize);
|
||||
self.ringBufferWriteIndex += copySize;
|
||||
sourceOffset += copySize;
|
||||
bytesToCopy -= copySize;
|
||||
|
||||
50
keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.h
Normal file
50
keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.h
Normal file
@@ -0,0 +1,50 @@
|
||||
//
|
||||
// DeepgramStreamingManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol DeepgramStreamingManagerDelegate <NSObject>
|
||||
@optional
|
||||
- (void)deepgramStreamingManagerDidConnect;
|
||||
- (void)deepgramStreamingManagerDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)deepgramStreamingManagerDidUpdateRMS:(float)rms;
|
||||
- (void)deepgramStreamingManagerDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)deepgramStreamingManagerDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)deepgramStreamingManagerDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// Manager for Deepgram live transcription.
|
||||
@interface DeepgramStreamingManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<DeepgramStreamingManagerDelegate> delegate;
|
||||
|
||||
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
|
||||
@property(nonatomic, copy) NSString *apiKey;
|
||||
|
||||
@property(nonatomic, copy, nullable) NSString *language;
|
||||
@property(nonatomic, copy, nullable) NSString *model;
|
||||
@property(nonatomic, assign) BOOL punctuate;
|
||||
@property(nonatomic, assign) BOOL smartFormat;
|
||||
@property(nonatomic, assign) BOOL interimResults;
|
||||
|
||||
@property(nonatomic, copy) NSString *encoding; // linear16
|
||||
@property(nonatomic, assign) double sampleRate;
|
||||
@property(nonatomic, assign) int channels;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isStreaming) BOOL streaming;
|
||||
|
||||
- (void)start;
|
||||
- (void)prepareConnection;
|
||||
- (void)stopAndFinalize;
|
||||
- (void)cancel;
|
||||
- (void)disconnect;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
508
keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m
Normal file
508
keyBoard/Class/AiTalk/VM/DeepgramStreamingManager.m
Normal file
@@ -0,0 +1,508 @@
|
||||
//
|
||||
// DeepgramStreamingManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "DeepgramStreamingManager.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
#import "AudioSessionManager.h"
|
||||
#import "DeepgramWebSocketClient.h"
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
static NSString *const kDeepgramStreamingManagerErrorDomain =
|
||||
@"DeepgramStreamingManager";
|
||||
|
||||
@interface DeepgramStreamingManager () <AudioSessionManagerDelegate,
|
||||
AudioCaptureManagerDelegate,
|
||||
DeepgramWebSocketClientDelegate>
|
||||
|
||||
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||||
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||||
@property(nonatomic, strong) DeepgramWebSocketClient *client;
|
||||
@property(nonatomic, strong) dispatch_queue_t stateQueue;
|
||||
|
||||
@property(nonatomic, assign) BOOL streaming;
|
||||
@property(nonatomic, strong) NSMutableArray<NSData *> *pendingFrames;
|
||||
@property(nonatomic, assign) NSUInteger pendingFrameLimit;
|
||||
@property(nonatomic, assign) BOOL connecting;
|
||||
@property(nonatomic, assign) BOOL pendingStart;
|
||||
@property(nonatomic, assign) BOOL keepConnection;
|
||||
@property(nonatomic, strong) dispatch_source_t keepAliveTimer;
|
||||
@property(nonatomic, assign) NSInteger reconnectAttempts;
|
||||
@property(nonatomic, assign) NSInteger maxReconnectAttempts;
|
||||
@property(nonatomic, assign) BOOL reconnectScheduled;
|
||||
@property(nonatomic, assign) BOOL appInBackground;
|
||||
@property(nonatomic, assign) BOOL shouldReconnectOnForeground;
|
||||
|
||||
@end
|
||||
|
||||
@implementation DeepgramStreamingManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_stateQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.manager",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
|
||||
_audioSession = [AudioSessionManager sharedManager];
|
||||
_audioSession.delegate = self;
|
||||
|
||||
_audioCapture = [[AudioCaptureManager alloc] init];
|
||||
_audioCapture.delegate = self;
|
||||
|
||||
_client = [[DeepgramWebSocketClient alloc] init];
|
||||
_client.delegate = self;
|
||||
|
||||
_serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
_encoding = @"linear16";
|
||||
_sampleRate = 16000.0;
|
||||
_channels = 1;
|
||||
_punctuate = YES;
|
||||
_smartFormat = YES;
|
||||
_interimResults = YES;
|
||||
|
||||
_pendingFrames = [[NSMutableArray alloc] init];
|
||||
_pendingFrameLimit = 25;
|
||||
_connecting = NO;
|
||||
_pendingStart = NO;
|
||||
_keepConnection = NO;
|
||||
_reconnectAttempts = 0;
|
||||
_maxReconnectAttempts = 5;
|
||||
_reconnectScheduled = NO;
|
||||
_appInBackground = NO;
|
||||
_shouldReconnectOnForeground = NO;
|
||||
|
||||
[self setupNotifications];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self removeNotifications];
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
- (void)start {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
self.keepConnection = YES;
|
||||
self.pendingStart = YES;
|
||||
self.reconnectAttempts = 0;
|
||||
if (self.apiKey.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram API key is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession hasMicrophonePermission]) {
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
if (!granted) {
|
||||
[strongSelf reportErrorWithMessage:@"Microphone permission denied"];
|
||||
return;
|
||||
}
|
||||
dispatch_async(strongSelf.stateQueue, ^{
|
||||
[strongSelf start];
|
||||
});
|
||||
}];
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *error = nil;
|
||||
if (![self.audioSession configureForConversation:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession activateSession:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioCapture isCapturing]) {
|
||||
NSError *captureError = nil;
|
||||
if (![self.audioCapture startCapture:&captureError]) {
|
||||
[self reportError:captureError];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
NSLog(@"[DeepgramStreamingManager] Start streaming, server: %@",
|
||||
self.serverURL);
|
||||
|
||||
if (self.client.isConnected) {
|
||||
[self beginStreamingIfReady];
|
||||
return;
|
||||
}
|
||||
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)prepareConnection {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
self.keepConnection = YES;
|
||||
self.pendingStart = NO;
|
||||
self.reconnectAttempts = 0;
|
||||
|
||||
if (self.apiKey.length == 0) {
|
||||
NSLog(@"[DeepgramStreamingManager] Prepare skipped: API key missing");
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stopAndFinalize {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
[self.client disableAudioSending];
|
||||
[self startKeepAliveIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
[self.pendingFrames removeAllObjects];
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
self.shouldReconnectOnForeground = NO;
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioCaptureManagerDelegate
|
||||
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||||
if (pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (!self.streaming || !self.client.isConnected) {
|
||||
[self.pendingFrames addObject:pcmFrame];
|
||||
if (self.pendingFrames.count > self.pendingFrameLimit) {
|
||||
[self.pendingFrames removeObjectAtIndex:0];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
[self.client sendAudioPCMFrame:pcmFrame];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidUpdateRMS:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidUpdateRMS:rms];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioSessionManagerDelegate
|
||||
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||||
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||||
[self cancel];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||||
[self reportErrorWithMessage:@"Microphone permission denied"];
|
||||
}
|
||||
|
||||
#pragma mark - DeepgramWebSocketClientDelegate
|
||||
|
||||
- (void)deepgramClientDidConnect {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.connecting = NO;
|
||||
self.reconnectAttempts = 0;
|
||||
self.reconnectScheduled = NO;
|
||||
[self beginStreamingIfReady];
|
||||
[self startKeepAliveIfNeeded];
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidConnect)]) {
|
||||
[self.delegate deepgramStreamingManagerDidConnect];
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
self.connecting = NO;
|
||||
[self.audioSession deactivateSession];
|
||||
[self stopKeepAlive];
|
||||
|
||||
if (self.pendingStart || self.keepConnection) {
|
||||
[self scheduleReconnectWithError:error];
|
||||
}
|
||||
});
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidDisconnect:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidReceiveInterimTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidReceiveFinalTranscript:text];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)deepgramClientDidFail:(NSError *)error {
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
#pragma mark - Error Reporting
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramStreamingManagerDidFail:)]) {
|
||||
[self.delegate deepgramStreamingManagerDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kDeepgramStreamingManagerErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)connectIfNeeded {
|
||||
if (self.connecting || self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.serverURL.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram server URL is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
self.client.serverURL = self.serverURL;
|
||||
self.client.apiKey = self.apiKey;
|
||||
self.client.language = self.language;
|
||||
self.client.model = self.model;
|
||||
self.client.punctuate = self.punctuate;
|
||||
self.client.smartFormat = self.smartFormat;
|
||||
self.client.interimResults = self.interimResults;
|
||||
self.client.encoding = self.encoding;
|
||||
self.client.sampleRate = self.sampleRate;
|
||||
self.client.channels = self.channels;
|
||||
[self.client disableAudioSending];
|
||||
self.connecting = YES;
|
||||
[self.client connect];
|
||||
}
|
||||
|
||||
- (void)beginStreamingIfReady {
|
||||
if (!self.pendingStart) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.streaming = YES;
|
||||
[self.client enableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
|
||||
if (self.pendingFrames.count > 0) {
|
||||
NSArray<NSData *> *frames = [self.pendingFrames copy];
|
||||
[self.pendingFrames removeAllObjects];
|
||||
for (NSData *frame in frames) {
|
||||
[self.client sendAudioPCMFrame:frame];
|
||||
}
|
||||
NSLog(@"[DeepgramStreamingManager] Flushed %lu pending frames",
|
||||
(unsigned long)frames.count);
|
||||
}
|
||||
}
|
||||
|
||||
- (void)scheduleReconnectWithError:(NSError *_Nullable)error {
|
||||
if (self.reconnectScheduled || self.connecting || self.client.isConnected) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.reconnectAttempts >= self.maxReconnectAttempts) {
|
||||
NSLog(@"[DeepgramStreamingManager] Reconnect failed %ld times, stop retry. %@",
|
||||
(long)self.maxReconnectAttempts,
|
||||
error.localizedDescription ?: @"");
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
return;
|
||||
}
|
||||
|
||||
self.reconnectAttempts += 1;
|
||||
self.reconnectScheduled = YES;
|
||||
|
||||
dispatch_after(dispatch_time(DISPATCH_TIME_NOW, (int64_t)(1 * NSEC_PER_SEC)),
|
||||
self.stateQueue, ^{
|
||||
self.reconnectScheduled = NO;
|
||||
if (self.appInBackground) {
|
||||
self.shouldReconnectOnForeground = YES;
|
||||
return;
|
||||
}
|
||||
if (!self.pendingStart && !self.keepConnection) {
|
||||
return;
|
||||
}
|
||||
[self connectIfNeeded];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)setupNotifications {
|
||||
NSNotificationCenter *center = [NSNotificationCenter defaultCenter];
|
||||
[center addObserver:self
|
||||
selector:@selector(handleAppDidEnterBackground)
|
||||
name:UIApplicationDidEnterBackgroundNotification
|
||||
object:nil];
|
||||
[center addObserver:self
|
||||
selector:@selector(handleAppWillEnterForeground)
|
||||
name:UIApplicationWillEnterForegroundNotification
|
||||
object:nil];
|
||||
}
|
||||
|
||||
- (void)removeNotifications {
|
||||
[[NSNotificationCenter defaultCenter] removeObserver:self];
|
||||
}
|
||||
|
||||
- (void)handleAppDidEnterBackground {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.appInBackground = YES;
|
||||
self.shouldReconnectOnForeground =
|
||||
self.keepConnection || self.pendingStart;
|
||||
self.pendingStart = NO;
|
||||
self.keepConnection = NO;
|
||||
|
||||
if (self.streaming) {
|
||||
[self.audioCapture stopCapture];
|
||||
self.streaming = NO;
|
||||
}
|
||||
|
||||
[self.pendingFrames removeAllObjects];
|
||||
[self.client disableAudioSending];
|
||||
[self stopKeepAlive];
|
||||
[self.client disconnect];
|
||||
[self.audioSession deactivateSession];
|
||||
|
||||
NSLog(@"[DeepgramStreamingManager] App entered background, socket closed");
|
||||
});
|
||||
}
|
||||
|
||||
- (void)handleAppWillEnterForeground {
|
||||
dispatch_async(self.stateQueue, ^{
|
||||
self.appInBackground = NO;
|
||||
if (self.shouldReconnectOnForeground) {
|
||||
self.keepConnection = YES;
|
||||
self.reconnectAttempts = 0;
|
||||
[self connectIfNeeded];
|
||||
}
|
||||
self.shouldReconnectOnForeground = NO;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)startKeepAliveIfNeeded {
|
||||
if (!self.keepConnection || !self.client.isConnected || self.streaming) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (self.keepAliveTimer) {
|
||||
return;
|
||||
}
|
||||
|
||||
self.keepAliveTimer =
|
||||
dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0,
|
||||
self.stateQueue);
|
||||
dispatch_source_set_timer(self.keepAliveTimer,
|
||||
dispatch_time(DISPATCH_TIME_NOW, 15 * NSEC_PER_SEC),
|
||||
15 * NSEC_PER_SEC, 1 * NSEC_PER_SEC);
|
||||
__weak typeof(self) weakSelf = self;
|
||||
dispatch_source_set_event_handler(self.keepAliveTimer, ^{
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
[strongSelf.client sendKeepAlive];
|
||||
});
|
||||
dispatch_resume(self.keepAliveTimer);
|
||||
}
|
||||
|
||||
- (void)stopKeepAlive {
|
||||
if (self.keepAliveTimer) {
|
||||
dispatch_source_cancel(self.keepAliveTimer);
|
||||
self.keepAliveTimer = nil;
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
52
keyBoard/Class/AiTalk/VM/DeepgramWebSocketClient.h
Normal file
52
keyBoard/Class/AiTalk/VM/DeepgramWebSocketClient.h
Normal file
@@ -0,0 +1,52 @@
|
||||
//
|
||||
// DeepgramWebSocketClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@protocol DeepgramWebSocketClientDelegate <NSObject>
|
||||
@optional
|
||||
- (void)deepgramClientDidConnect;
|
||||
- (void)deepgramClientDidDisconnect:(NSError *_Nullable)error;
|
||||
- (void)deepgramClientDidReceiveInterimTranscript:(NSString *)text;
|
||||
- (void)deepgramClientDidReceiveFinalTranscript:(NSString *)text;
|
||||
- (void)deepgramClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// WebSocket client for Deepgram live transcription.
|
||||
@interface DeepgramWebSocketClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<DeepgramWebSocketClientDelegate> delegate;
|
||||
|
||||
@property(nonatomic, copy) NSString *serverURL; // wss://api.deepgram.com/v1/listen
|
||||
@property(nonatomic, copy) NSString *apiKey;
|
||||
|
||||
@property(nonatomic, copy, nullable) NSString *language;
|
||||
@property(nonatomic, copy, nullable) NSString *model;
|
||||
@property(nonatomic, assign) BOOL punctuate;
|
||||
@property(nonatomic, assign) BOOL smartFormat;
|
||||
@property(nonatomic, assign) BOOL interimResults;
|
||||
|
||||
@property(nonatomic, copy) NSString *encoding; // linear16
|
||||
@property(nonatomic, assign) double sampleRate;
|
||||
@property(nonatomic, assign) int channels;
|
||||
|
||||
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
|
||||
|
||||
- (void)connect;
|
||||
- (void)disconnect;
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
|
||||
- (void)finish;
|
||||
- (void)sendKeepAlive;
|
||||
|
||||
- (void)enableAudioSending;
|
||||
- (void)disableAudioSending;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
413
keyBoard/Class/AiTalk/VM/DeepgramWebSocketClient.m
Normal file
413
keyBoard/Class/AiTalk/VM/DeepgramWebSocketClient.m
Normal file
@@ -0,0 +1,413 @@
|
||||
//
|
||||
// DeepgramWebSocketClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/21.
|
||||
//
|
||||
|
||||
#import "DeepgramWebSocketClient.h"
|
||||
|
||||
static NSString *const kDeepgramWebSocketClientErrorDomain =
|
||||
@"DeepgramWebSocketClient";
|
||||
|
||||
@interface DeepgramWebSocketClient () <NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL connected;
|
||||
@property(nonatomic, assign) BOOL audioSendingEnabled;
|
||||
|
||||
@end
|
||||
|
||||
@implementation DeepgramWebSocketClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.deepgram.ws",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_serverURL = @"wss://api.deepgram.com/v1/listen";
|
||||
_encoding = @"linear16";
|
||||
_sampleRate = 16000.0;
|
||||
_channels = 1;
|
||||
_punctuate = YES;
|
||||
_smartFormat = YES;
|
||||
_interimResults = YES;
|
||||
_audioSendingEnabled = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self disconnect];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)connect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self disconnectInternal];
|
||||
|
||||
if (self.apiKey.length == 0) {
|
||||
[self reportErrorWithMessage:@"Deepgram API key is required"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSURL *url = [self buildURL];
|
||||
if (!url) {
|
||||
[self reportErrorWithMessage:@"Invalid Deepgram URL"];
|
||||
return;
|
||||
}
|
||||
|
||||
NSLog(@"[DeepgramWebSocketClient] Connecting: %@", url.absoluteString);
|
||||
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
|
||||
[request setValue:[NSString stringWithFormat:@"Token %@", self.apiKey]
|
||||
forHTTPHeaderField:@"Authorization"];
|
||||
|
||||
self.webSocketTask = [self.urlSession webSocketTaskWithRequest:request];
|
||||
[self.webSocketTask resume];
|
||||
[self receiveMessage];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disconnect {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
BOOL shouldNotify = self.webSocketTask != nil;
|
||||
if (shouldNotify) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Disconnect requested");
|
||||
}
|
||||
[self disconnectInternal];
|
||||
if (shouldNotify) {
|
||||
[self notifyDisconnect:nil];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.connected || !self.webSocketTask || pcmFrame.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
if (!self.audioSendingEnabled) {
|
||||
return;
|
||||
}
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
NSLog(@"[DeepgramWebSocketClient] Sent audio frame: %lu bytes",
|
||||
(unsigned long)pcmFrame.length);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)finish {
|
||||
NSLog(@"[DeepgramWebSocketClient] Sending CloseStream");
|
||||
[self sendJSON:@{ @"type" : @"CloseStream" }];
|
||||
}
|
||||
|
||||
- (void)sendKeepAlive {
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
[self sendJSON:@{ @"type" : @"KeepAlive" }];
|
||||
}
|
||||
|
||||
- (void)enableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = YES;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)disableAudioSending {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.audioSendingEnabled = NO;
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (NSURL *)buildURL {
|
||||
if (self.serverURL.length == 0) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSURLComponents *components =
|
||||
[NSURLComponents componentsWithString:self.serverURL];
|
||||
if (!components) {
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSMutableArray<NSURLQueryItem *> *items =
|
||||
components.queryItems.mutableCopy ?: [NSMutableArray array];
|
||||
|
||||
[self upsertQueryItemWithName:@"model" value:self.model items:items];
|
||||
[self upsertQueryItemWithName:@"language" value:self.language items:items];
|
||||
|
||||
[self upsertQueryItemWithName:@"punctuate"
|
||||
value:(self.punctuate ? @"true" : @"false")
|
||||
items:items];
|
||||
[self upsertQueryItemWithName:@"smart_format"
|
||||
value:(self.smartFormat ? @"true" : @"false")
|
||||
items:items];
|
||||
[self upsertQueryItemWithName:@"interim_results"
|
||||
value:(self.interimResults ? @"true" : @"false")
|
||||
items:items];
|
||||
|
||||
[self upsertQueryItemWithName:@"encoding" value:self.encoding items:items];
|
||||
[self upsertQueryItemWithName:@"sample_rate"
|
||||
value:[NSString stringWithFormat:@"%.0f",
|
||||
self.sampleRate]
|
||||
items:items];
|
||||
[self upsertQueryItemWithName:@"channels"
|
||||
value:[NSString stringWithFormat:@"%d", self.channels]
|
||||
items:items];
|
||||
|
||||
components.queryItems = items;
|
||||
return components.URL;
|
||||
}
|
||||
|
||||
- (void)upsertQueryItemWithName:(NSString *)name
|
||||
value:(NSString *)value
|
||||
items:(NSMutableArray<NSURLQueryItem *> *)items {
|
||||
if (name.length == 0 || value.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (NSUInteger i = 0; i < items.count; i++) {
|
||||
NSURLQueryItem *item = items[i];
|
||||
if ([item.name isEqualToString:name]) {
|
||||
items[i] = [NSURLQueryItem queryItemWithName:name value:value];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
[items addObject:[NSURLQueryItem queryItemWithName:name value:value]];
|
||||
}
|
||||
|
||||
- (void)sendJSON:(NSDictionary *)dict {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:dict
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString =
|
||||
[[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
if (!jsonString) {
|
||||
[self reportErrorWithMessage:@"Failed to encode JSON message"];
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)receiveMessage {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
||||
[strongSelf notifyDisconnect:error];
|
||||
[strongSelf disconnectInternal];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Received text: %@", message.string);
|
||||
[strongSelf handleTextMessage:message.string];
|
||||
} else if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
||||
NSLog(@"[DeepgramWebSocketClient] Received binary: %lu bytes",
|
||||
(unsigned long)message.data.length);
|
||||
[strongSelf handleBinaryMessage:message.data];
|
||||
}
|
||||
|
||||
[strongSelf receiveMessage];
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)handleTextMessage:(NSString *)text {
|
||||
if (text.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
||||
if (!data) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *errorMessage = json[@"error"];
|
||||
if (errorMessage.length > 0) {
|
||||
[self reportErrorWithMessage:errorMessage];
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *channel = json[@"channel"];
|
||||
if (![channel isKindOfClass:[NSDictionary class]]) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSArray *alternatives = channel[@"alternatives"];
|
||||
if (![alternatives isKindOfClass:[NSArray class]] || alternatives.count == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *firstAlt = alternatives.firstObject;
|
||||
NSString *transcript = firstAlt[@"transcript"] ?: @"";
|
||||
BOOL isFinal = [json[@"is_final"] boolValue] ||
|
||||
[json[@"speech_final"] boolValue];
|
||||
|
||||
if (transcript.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (isFinal) {
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramClientDidReceiveFinalTranscript:)]) {
|
||||
[self.delegate deepgramClientDidReceiveFinalTranscript:transcript];
|
||||
}
|
||||
} else {
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramClientDidReceiveInterimTranscript:)]) {
|
||||
[self.delegate deepgramClientDidReceiveInterimTranscript:transcript];
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)handleBinaryMessage:(NSData *)data {
|
||||
}
|
||||
|
||||
- (void)disconnectInternal {
|
||||
self.connected = NO;
|
||||
self.audioSendingEnabled = NO;
|
||||
|
||||
if (self.webSocketTask) {
|
||||
[self.webSocketTask
|
||||
cancelWithCloseCode:NSURLSessionWebSocketCloseCodeNormalClosure
|
||||
reason:nil];
|
||||
self.webSocketTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(deepgramClientDidFail:)]) {
|
||||
[self.delegate deepgramClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportErrorWithMessage:(NSString *)message {
|
||||
NSError *error = [NSError errorWithDomain:kDeepgramWebSocketClientErrorDomain
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : message ?: @""
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
- (void)notifyDisconnect:(NSError *_Nullable)error {
|
||||
self.connected = NO;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(deepgramClientDidDisconnect:)]) {
|
||||
[self.delegate deepgramClientDidDisconnect:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionWebSocketDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didOpenWithProtocol:(NSString *)protocol {
|
||||
self.connected = YES;
|
||||
NSLog(@"[DeepgramWebSocketClient] Connected");
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(deepgramClientDidConnect)]) {
|
||||
[self.delegate deepgramClientDidConnect];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
||||
reason:(NSData *)reason {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
NSLog(@"[DeepgramWebSocketClient] Closed with code: %ld",
|
||||
(long)closeCode);
|
||||
[self notifyDisconnect:nil];
|
||||
[self disconnectInternal];
|
||||
}
|
||||
|
||||
@end
|
||||
1119
keyBoard/Class/AiTalk/deepgramAPI.md
Normal file
1119
keyBoard/Class/AiTalk/deepgramAPI.md
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user