添加语音websocket等,还没测试
This commit is contained in:
521
keyBoard/Class/AiTalk/AI技术分析.txt
Normal file
521
keyBoard/Class/AiTalk/AI技术分析.txt
Normal file
@@ -0,0 +1,521 @@
|
||||
服务 用途 示例格式
|
||||
ASR 服务器 语音识别(WebSocket) wss://api.example.com/asr
|
||||
LLM 服务器 AI 对话(HTTP SSE) https://api.example.com/chat
|
||||
TTS 服务器 语音合成 https://api.example.com/tts
|
||||
|
||||
iOS(Objective-C,iOS 15+)端技术实现文档
|
||||
低延迟流式语音陪伴聊天(按住说话,类似猫箱首页)
|
||||
0. 范围与目标
|
||||
|
||||
实现首页语音陪伴对话:
|
||||
|
||||
按住说话:开始录音并实时流式发送到 ASR
|
||||
|
||||
松开结束:ASR 立即 finalize,返回最终文本并显示
|
||||
|
||||
AI 回复:边显示文字(打字机效果)边播放服务端 TTS 音频
|
||||
|
||||
延迟低优先:不等待完整回答/完整音频,采用“分句触发 + 流式/准流式播放”
|
||||
|
||||
打断(Barge-in):AI 正在播报时用户再次按住 → 立即停止播报/取消请求,进入新一轮录音
|
||||
|
||||
iOS 最低版本:iOS 15
|
||||
|
||||
1. 总体架构(客户端模块)
|
||||
KBAiMainVC
|
||||
└─ ConversationOrchestrator (核心状态机 / 串联模块 / 取消与打断)
|
||||
├─ AudioSessionManager (AVAudioSession 配置与中断处理)
|
||||
├─ AudioCaptureManager (AVAudioEngine input tap -> 20ms PCM frames)
|
||||
├─ ASRStreamClient (NSURLSessionWebSocketTask 流式识别)
|
||||
├─ LLMStreamClient (SSE/WS token stream)
|
||||
├─ Segmenter (句子切分:够一句就触发 TTS)
|
||||
├─ TTSServiceClient (请求 TTS,适配多种返回形态)
|
||||
├─ TTSPlaybackPipeline (可插拔:URL播放器 / AAC解码 / PCM直喂)
|
||||
├─ AudioStreamPlayer (AVAudioEngine + AVAudioPlayerNode 播 PCM)
|
||||
└─ SubtitleSync (按播放进度映射文字进度)
|
||||
|
||||
2. 音频会话(AVAudioSession)与权限
|
||||
2.1 麦克风权限
|
||||
|
||||
仅在用户第一次按住说话前请求
|
||||
|
||||
若用户拒绝:提示到设置开启
|
||||
|
||||
2.2 AudioSession 配置(对话模式)
|
||||
|
||||
Objective-C(建议参数):
|
||||
|
||||
category:AVAudioSessionCategoryPlayAndRecord
|
||||
|
||||
mode:AVAudioSessionModeVoiceChat
|
||||
|
||||
options:
|
||||
|
||||
AVAudioSessionCategoryOptionDefaultToSpeaker
|
||||
|
||||
AVAudioSessionCategoryOptionAllowBluetooth
|
||||
|
||||
(可选)AVAudioSessionCategoryOptionMixWithOthers:若你希望不打断宿主音频(看产品)
|
||||
|
||||
2.3 中断与路由变化处理(必须)
|
||||
|
||||
监听:
|
||||
|
||||
AVAudioSessionInterruptionNotification
|
||||
|
||||
AVAudioSessionRouteChangeNotification
|
||||
|
||||
处理原则:
|
||||
|
||||
来电/中断开始:停止采集 + 停止播放 + cancel 网络会话
|
||||
|
||||
中断结束:回到 Idle,等待用户重新按住
|
||||
|
||||
3. 音频采集(按住期间流式上传)
|
||||
3.1 固定音频参数(锁死,便于端到端稳定)
|
||||
|
||||
Sample Rate:16000 Hz
|
||||
|
||||
Channels:1
|
||||
|
||||
Format:PCM Int16(pcm_s16le)
|
||||
|
||||
Frame Duration:20ms
|
||||
|
||||
16kHz * 0.02s = 320 samples
|
||||
|
||||
每帧 bytes = 320 * 2 = 640 bytes
|
||||
|
||||
3.2 AudioCaptureManager(AVAudioEngine 输入 tap)
|
||||
|
||||
使用:
|
||||
|
||||
AVAudioEngine
|
||||
|
||||
inputNode installTapOnBus:bufferSize:format:block:
|
||||
|
||||
关键点:
|
||||
|
||||
tap 回调线程不可做重活:只做拷贝 + dispatch 到 audioQueue
|
||||
|
||||
将 AVAudioPCMBuffer 转成 Int16 PCM NSData
|
||||
|
||||
确保稳定输出“20ms帧”,如果 tap 回调 buffer 不刚好是 20ms,需要做 帧拼接/切片(ring buffer)
|
||||
|
||||
3.3 接口定义(OC)
|
||||
@protocol AudioCaptureManagerDelegate <NSObject>
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame; // 20ms/640B
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms; // 可选:UI波形
|
||||
@end
|
||||
|
||||
@interface AudioCaptureManager : NSObject
|
||||
@property (nonatomic, weak) id<AudioCaptureManagerDelegate> delegate;
|
||||
- (BOOL)startCapture:(NSError **)error;
|
||||
- (void)stopCapture;
|
||||
@end
|
||||
|
||||
4. ASR 流式识别(iOS15:NSURLSessionWebSocketTask)
|
||||
4.1 建议协议(控制帧 JSON + 音频帧二进制)
|
||||
|
||||
Start(文本帧)
|
||||
|
||||
{
|
||||
"type":"start",
|
||||
"sessionId":"uuid",
|
||||
"format":"pcm_s16le",
|
||||
"sampleRate":16000,
|
||||
"channels":1
|
||||
}
|
||||
|
||||
|
||||
Audio(二进制帧)
|
||||
|
||||
直接发送 640B/帧 PCM
|
||||
|
||||
频率:50fps(每秒 50 帧)
|
||||
|
||||
Finalize(文本帧)
|
||||
|
||||
{ "type":"finalize", "sessionId":"uuid" }
|
||||
|
||||
4.2 下行事件
|
||||
{ "type":"partial", "text":"今天" }
|
||||
{ "type":"final", "text":"今天天气怎么样" }
|
||||
{ "type":"error", "code":123, "message":"..." }
|
||||
|
||||
4.3 ASRStreamClient 接口(OC)
|
||||
@protocol ASRStreamClientDelegate <NSObject>
|
||||
- (void)asrClientDidReceivePartialText:(NSString *)text;
|
||||
- (void)asrClientDidReceiveFinalText:(NSString *)text;
|
||||
- (void)asrClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
@interface ASRStreamClient : NSObject
|
||||
@property (nonatomic, weak) id<ASRStreamClientDelegate> delegate;
|
||||
- (void)startWithSessionId:(NSString *)sessionId;
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame; // 20ms frame
|
||||
- (void)finalize;
|
||||
- (void)cancel;
|
||||
@end
|
||||
|
||||
5. LLM 流式生成(token stream)
|
||||
5.1 目标
|
||||
|
||||
低延迟:不要等整段回答
|
||||
|
||||
使用 SSE 或 WS 收 token
|
||||
|
||||
token 进入 Segmenter,够一句就触发 TTS
|
||||
|
||||
5.2 LLMStreamClient 接口(OC)
|
||||
@protocol LLMStreamClientDelegate <NSObject>
|
||||
- (void)llmClientDidReceiveToken:(NSString *)token;
|
||||
- (void)llmClientDidComplete;
|
||||
- (void)llmClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
@interface LLMStreamClient : NSObject
|
||||
@property (nonatomic, weak) id<LLMStreamClientDelegate> delegate;
|
||||
- (void)sendUserText:(NSString *)text conversationId:(NSString *)cid;
|
||||
- (void)cancel;
|
||||
@end
|
||||
|
||||
6. Segmenter(句子切分:先播第一句)
|
||||
6.1 切分规则(推荐)
|
||||
|
||||
任一满足则切分成 segment:
|
||||
|
||||
遇到 。!?\n 之一
|
||||
|
||||
或累积字符数 ≥ 30(可配置)
|
||||
|
||||
6.2 Segmenter 接口(OC)
|
||||
@interface Segmenter : NSObject
|
||||
- (void)appendToken:(NSString *)token;
|
||||
- (NSArray<NSString *> *)popReadySegments; // 返回立即可TTS的片段数组
|
||||
- (void)reset;
|
||||
@end
|
||||
|
||||
7. TTS:返回形态未定 → 客户端做“可插拔播放管线”
|
||||
|
||||
由于服务端同事未定输出格式,客户端必须支持以下 四种 TTS 输出模式 的任意一种:
|
||||
|
||||
模式 A:返回 m4a/MP3 URL(最容易落地)
|
||||
|
||||
服务端返回 URL(或 base64 文件)
|
||||
|
||||
客户端用 AVPlayer / AVAudioPlayer 播放
|
||||
|
||||
字幕同步用“音频时长映射”(可拿到 duration)
|
||||
|
||||
优点:服务端简单
|
||||
缺点:首帧延迟通常更高(要等整段生成、至少等首包)
|
||||
|
||||
模式 B:返回 AAC chunk(流式)
|
||||
|
||||
服务端 WS 推 AAC 帧
|
||||
|
||||
客户端需要 AAC 解码成 PCM,再喂 AudioStreamPlayer
|
||||
|
||||
模式 C:返回 Opus chunk(流式)
|
||||
|
||||
需 Opus 解码库(服务端/客户端成本更高)
|
||||
|
||||
解码后喂 PCM 播放
|
||||
|
||||
模式 D:返回 PCM chunk(最适合低延迟)
|
||||
|
||||
服务端直接推 PCM16 chunk(比如 100ms 一块)
|
||||
|
||||
客户端直接转 AVAudioPCMBuffer schedule
|
||||
|
||||
延迟最低、实现最稳
|
||||
|
||||
8. TTSServiceClient(统一网络层接口)
|
||||
8.1 统一回调事件(抽象)
|
||||
typedef NS_ENUM(NSInteger, TTSPayloadType) {
|
||||
TTSPayloadTypeURL, // A
|
||||
TTSPayloadTypePCMChunk, // D
|
||||
TTSPayloadTypeAACChunk, // B
|
||||
TTSPayloadTypeOpusChunk // C
|
||||
};
|
||||
|
||||
@protocol TTSServiceClientDelegate <NSObject>
|
||||
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId;
|
||||
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
|
||||
payloadType:(TTSPayloadType)type
|
||||
segmentId:(NSString *)segmentId;
|
||||
- (void)ttsClientDidFinishSegment:(NSString *)segmentId;
|
||||
- (void)ttsClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
@interface TTSServiceClient : NSObject
|
||||
@property (nonatomic, weak) id<TTSServiceClientDelegate> delegate;
|
||||
- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId;
|
||||
- (void)cancel;
|
||||
@end
|
||||
|
||||
|
||||
这样服务端最后选哪种输出,你只需实现对应分支即可,不需要推翻客户端架构。
|
||||
|
||||
9. TTSPlaybackPipeline(播放管线:根据 payloadType 路由)
|
||||
9.1 设计目标
|
||||
|
||||
支持 URL 播放与流式 chunk 播放
|
||||
|
||||
提供统一的“开始播放/停止/进度”接口供字幕同步与打断使用
|
||||
|
||||
9.2 Pipeline 结构(建议)
|
||||
|
||||
TTSPlaybackPipeline 只做路由与队列管理
|
||||
|
||||
URL → TTSURLPlayer(AVPlayer)
|
||||
|
||||
PCM → AudioStreamPlayer(AVAudioEngine)
|
||||
|
||||
AAC/Opus → Decoder → PCM → AudioStreamPlayer
|
||||
|
||||
9.3 Pipeline 接口(OC)
|
||||
@protocol TTSPlaybackPipelineDelegate <NSObject>
|
||||
- (void)pipelineDidStartSegment:(NSString *)segmentId duration:(NSTimeInterval)duration;
|
||||
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time segmentId:(NSString *)segmentId;
|
||||
- (void)pipelineDidFinishSegment:(NSString *)segmentId;
|
||||
@end
|
||||
|
||||
@interface TTSPlaybackPipeline : NSObject
|
||||
@property (nonatomic, weak) id<TTSPlaybackPipelineDelegate> delegate;
|
||||
|
||||
- (BOOL)start:(NSError **)error; // 启动音频引擎等
|
||||
- (void)stop; // 立即停止(打断)
|
||||
|
||||
- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId;
|
||||
- (void)enqueueChunk:(NSData *)chunk payloadType:(TTSPayloadType)type segmentId:(NSString *)segmentId;
|
||||
|
||||
// 可选:用于字幕同步
|
||||
- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId;
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
|
||||
@end
|
||||
|
||||
10. AudioStreamPlayer(PCM 流式播放,低延迟核心)
|
||||
10.1 使用 AVAudioEngine + AVAudioPlayerNode
|
||||
|
||||
将 PCM chunk 转 AVAudioPCMBuffer
|
||||
|
||||
scheduleBuffer 播放
|
||||
|
||||
维护“当前 segment 的播放时间/总时长”(可估算或累加 chunk 时长)
|
||||
|
||||
10.2 接口(OC)
|
||||
@interface AudioStreamPlayer : NSObject
|
||||
- (BOOL)start:(NSError **)error;
|
||||
- (void)stop;
|
||||
- (void)enqueuePCMChunk:(NSData *)pcmData
|
||||
sampleRate:(double)sampleRate
|
||||
channels:(int)channels
|
||||
segmentId:(NSString *)segmentId;
|
||||
|
||||
- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId;
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
|
||||
@end
|
||||
|
||||
|
||||
PCM chunk 的粒度建议:50ms~200ms(太小 schedule 太频繁,太大延迟高)。
|
||||
|
||||
11. 字幕同步(延迟优先)
|
||||
11.1 策略
|
||||
|
||||
对每个 segment 的文本 text,按播放进度映射显示字符数:
|
||||
|
||||
visibleCount = round(text.length * (t / T))
|
||||
|
||||
t:segment 当前播放进度(pipeline 提供)
|
||||
|
||||
T:segment 总时长(URL 模式直接取;chunk 模式可累加估算)
|
||||
|
||||
11.2 SubtitleSync 接口(OC)
|
||||
@interface SubtitleSync : NSObject
|
||||
- (NSString *)visibleTextForFullText:(NSString *)fullText
|
||||
currentTime:(NSTimeInterval)t
|
||||
duration:(NSTimeInterval)T;
|
||||
@end
|
||||
|
||||
12. ConversationOrchestrator(状态机 + 打断 + 队列)
|
||||
12.1 状态
|
||||
typedef NS_ENUM(NSInteger, ConversationState) {
|
||||
ConversationStateIdle,
|
||||
ConversationStateListening,
|
||||
ConversationStateRecognizing,
|
||||
ConversationStateThinking,
|
||||
ConversationStateSpeaking
|
||||
};
|
||||
|
||||
12.2 关键流程
|
||||
事件:用户按住(userDidPressRecord)
|
||||
|
||||
如果正在 Speaking/Thinking:
|
||||
|
||||
[ttsService cancel]
|
||||
|
||||
[llmClient cancel]
|
||||
|
||||
[asrClient cancel](如仍在识别)
|
||||
|
||||
[pipeline stop](立即停播)
|
||||
|
||||
清空 segment 队列、字幕队列
|
||||
|
||||
配置/激活 AudioSession
|
||||
|
||||
新建 sessionId
|
||||
|
||||
[asrClient startWithSessionId:]
|
||||
|
||||
[audioCapture startCapture:]
|
||||
|
||||
state = Listening
|
||||
|
||||
事件:用户松开(userDidReleaseRecord)
|
||||
|
||||
[audioCapture stopCapture]
|
||||
|
||||
[asrClient finalize]
|
||||
|
||||
state = Recognizing
|
||||
|
||||
回调:ASR final text
|
||||
|
||||
UI 显示用户最终文本
|
||||
|
||||
state = Thinking
|
||||
|
||||
开始 LLM stream:[llmClient sendUserText:conversationId:]
|
||||
|
||||
回调:LLM token
|
||||
|
||||
segmenter appendToken
|
||||
|
||||
segments = [segmenter popReadySegments]
|
||||
|
||||
对每个 segment:
|
||||
|
||||
生成 segmentId
|
||||
|
||||
记录 segmentTextMap[segmentId] = segmentText
|
||||
|
||||
[ttsService requestTTSForText:segmentId:]
|
||||
|
||||
当收到第一个可播放音频并开始播:
|
||||
|
||||
state = Speaking
|
||||
|
||||
回调:TTS 音频到达
|
||||
|
||||
URL:[pipeline enqueueURL:segmentId:]
|
||||
|
||||
chunk:[pipeline enqueueChunk:payloadType:segmentId:]
|
||||
|
||||
回调:pipeline 播放时间更新(每 30-60fps 或定时器)
|
||||
|
||||
根据当前 segmentId 取到 fullText
|
||||
|
||||
visible = [subtitleSync visibleTextForFullText:currentTime:duration:]
|
||||
|
||||
UI 更新 AI 可见文本
|
||||
|
||||
12.3 打断(Barge-in)
|
||||
|
||||
当用户再次按住:
|
||||
|
||||
立即 stop 播放
|
||||
|
||||
取消所有未完成网络请求
|
||||
|
||||
丢弃所有未播放 segments
|
||||
|
||||
开始新一轮录音
|
||||
|
||||
12.4 Orchestrator 接口(OC)
|
||||
@interface ConversationOrchestrator : NSObject
|
||||
@property (nonatomic, assign, readonly) ConversationState state;
|
||||
|
||||
- (void)userDidPressRecord;
|
||||
- (void)userDidReleaseRecord;
|
||||
|
||||
@property (nonatomic, copy) void (^onUserFinalText)(NSString *text);
|
||||
@property (nonatomic, copy) void (^onAssistantVisibleText)(NSString *text);
|
||||
@property (nonatomic, copy) void (^onError)(NSError *error);
|
||||
@end
|
||||
|
||||
13. 线程/队列模型(强制要求,避免竞态)
|
||||
|
||||
建议三条队列 + 一条 orchestrator 串行队列:
|
||||
|
||||
dispatch_queue_t audioQueue;(采集帧处理、ring buffer)
|
||||
|
||||
dispatch_queue_t networkQueue;(WS 收发解析)
|
||||
|
||||
dispatch_queue_t orchestratorQueue;(状态机串行,唯一修改 state/队列的地方)
|
||||
|
||||
UI 更新统一回主线程
|
||||
|
||||
规则:
|
||||
|
||||
任何网络/音频回调 → dispatch_async(orchestratorQueue, ^{ ... })
|
||||
|
||||
Orchestrator 内部再决定是否发 UI 回调(主线程)
|
||||
|
||||
14. 关键参数(延迟与稳定性)
|
||||
|
||||
音频帧:20ms
|
||||
|
||||
PCM:16k/mono/int16
|
||||
|
||||
ASR 上传:WS 二进制
|
||||
|
||||
LLM:token stream
|
||||
|
||||
TTS:优先 chunk;若 URL 模式也要尽快开始下载与播放
|
||||
|
||||
chunk 播放缓冲:100~200ms(防抖动)
|
||||
|
||||
15. 开发落地建议(服务端未定情况下的迭代路径)
|
||||
Phase 1:先跑通端到端(用“URL 模式”模拟)
|
||||
|
||||
TTSServiceClient 先假定服务端返回 m4a URL(或本地 mock URL)
|
||||
|
||||
Pipeline 实现 URL 播放(AVPlayer)
|
||||
|
||||
打断 + 字幕同步先跑通
|
||||
|
||||
Phase 2:服务端定了输出后再替换
|
||||
|
||||
若服务端给 PCM chunk:直接走 AudioStreamPlayer(最推荐)
|
||||
|
||||
若给 AAC chunk:补 AAC 解码模块(AudioConverter 或第三方)
|
||||
|
||||
若给 Opus chunk:集成 Opus 解码库,再喂 PCM
|
||||
|
||||
关键:Orchestrator/Segmenter/ASR/字幕同步都不需要改,只替换 TTSPlaybackPipeline 分支。
|
||||
|
||||
16. 合规/体验注意
|
||||
|
||||
录音必须由用户动作触发(按住)
|
||||
|
||||
明确的“正在录音”提示与波形
|
||||
|
||||
避免自动偷录
|
||||
|
||||
播放时允许随时打断
|
||||
|
||||
文档结束
|
||||
给“写代码的 AI”的额外要求(建议你一并附上)
|
||||
|
||||
语言:Objective-C(.h/.m)
|
||||
|
||||
iOS 15+,WebSocket 用 NSURLSessionWebSocketTask
|
||||
|
||||
音频采集用 AVAudioEngine + ring buffer 切 20ms 帧
|
||||
|
||||
播放管线必须支持:URL 播放(AVPlayer)+ PCM chunk 播放(AVAudioEngine)
|
||||
|
||||
其余 AAC/Opus 分支可留 TODO / stub,但接口要预留
|
||||
16
keyBoard/Class/AiTalk/V/KBAICommentView.h
Normal file
16
keyBoard/Class/AiTalk/V/KBAICommentView.h
Normal file
@@ -0,0 +1,16 @@
|
||||
//
|
||||
// KBAICommentView.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/16.
|
||||
//
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
@interface KBAICommentView : UIView
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
20
keyBoard/Class/AiTalk/V/KBAICommentView.m
Normal file
20
keyBoard/Class/AiTalk/V/KBAICommentView.m
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// KBAICommentView.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/16.
|
||||
//
|
||||
|
||||
#import "KBAICommentView.h"
|
||||
|
||||
@implementation KBAICommentView
|
||||
|
||||
/*
|
||||
// Only override drawRect: if you perform custom drawing.
|
||||
// An empty implementation adversely affects performance during animation.
|
||||
- (void)drawRect:(CGRect)rect {
|
||||
// Drawing code
|
||||
}
|
||||
*/
|
||||
|
||||
@end
|
||||
54
keyBoard/Class/AiTalk/V/KBAiChatView.h
Normal file
54
keyBoard/Class/AiTalk/V/KBAiChatView.h
Normal file
@@ -0,0 +1,54 @@
|
||||
//
|
||||
// KBAiChatView.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 消息类型
|
||||
typedef NS_ENUM(NSInteger, KBAiChatMessageType) {
|
||||
KBAiChatMessageTypeUser, // 用户消息
|
||||
KBAiChatMessageTypeAssistant // AI 回复
|
||||
};
|
||||
|
||||
/// 聊天消息模型
|
||||
@interface KBAiChatMessage : NSObject
|
||||
@property(nonatomic, assign) KBAiChatMessageType type;
|
||||
@property(nonatomic, copy) NSString *text;
|
||||
@property(nonatomic, assign) BOOL isComplete; // 是否完成(用于打字机效果)
|
||||
+ (instancetype)userMessageWithText:(NSString *)text;
|
||||
+ (instancetype)assistantMessageWithText:(NSString *)text;
|
||||
@end
|
||||
|
||||
/// 聊天视图
|
||||
/// 显示用户消息和 AI 回复的气泡列表
|
||||
@interface KBAiChatView : UIView
|
||||
|
||||
/// 添加用户消息
|
||||
/// @param text 消息文本
|
||||
- (void)addUserMessage:(NSString *)text;
|
||||
|
||||
/// 添加 AI 消息
|
||||
/// @param text 消息文本
|
||||
- (void)addAssistantMessage:(NSString *)text;
|
||||
|
||||
/// 更新最后一条 AI 消息(用于打字机效果)
|
||||
/// @param text 当前可见文本
|
||||
- (void)updateLastAssistantMessage:(NSString *)text;
|
||||
|
||||
/// 标记最后一条 AI 消息完成
|
||||
- (void)markLastAssistantMessageComplete;
|
||||
|
||||
/// 清空所有消息
|
||||
- (void)clearMessages;
|
||||
|
||||
/// 滚动到底部
|
||||
- (void)scrollToBottom;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
296
keyBoard/Class/AiTalk/V/KBAiChatView.m
Normal file
296
keyBoard/Class/AiTalk/V/KBAiChatView.m
Normal file
@@ -0,0 +1,296 @@
|
||||
//
|
||||
// KBAiChatView.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "KBAiChatView.h"
|
||||
|
||||
#pragma mark - KBAiChatMessage
|
||||
|
||||
@implementation KBAiChatMessage
|
||||
|
||||
+ (instancetype)userMessageWithText:(NSString *)text {
|
||||
KBAiChatMessage *message = [[KBAiChatMessage alloc] init];
|
||||
message.type = KBAiChatMessageTypeUser;
|
||||
message.text = text;
|
||||
message.isComplete = YES;
|
||||
return message;
|
||||
}
|
||||
|
||||
+ (instancetype)assistantMessageWithText:(NSString *)text {
|
||||
KBAiChatMessage *message = [[KBAiChatMessage alloc] init];
|
||||
message.type = KBAiChatMessageTypeAssistant;
|
||||
message.text = text;
|
||||
message.isComplete = NO;
|
||||
return message;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
#pragma mark - KBAiChatBubbleCell
|
||||
|
||||
@interface KBAiChatBubbleCell : UITableViewCell
|
||||
@property(nonatomic, strong) UIView *bubbleView;
|
||||
@property(nonatomic, strong) UILabel *messageLabel;
|
||||
@property(nonatomic, assign) KBAiChatMessageType messageType;
|
||||
@end
|
||||
|
||||
@implementation KBAiChatBubbleCell
|
||||
|
||||
- (instancetype)initWithStyle:(UITableViewCellStyle)style
|
||||
reuseIdentifier:(NSString *)reuseIdentifier {
|
||||
self = [super initWithStyle:style reuseIdentifier:reuseIdentifier];
|
||||
if (self) {
|
||||
self.backgroundColor = [UIColor clearColor];
|
||||
self.selectionStyle = UITableViewCellSelectionStyleNone;
|
||||
|
||||
// 气泡视图
|
||||
self.bubbleView = [[UIView alloc] init];
|
||||
self.bubbleView.layer.cornerRadius = 16;
|
||||
self.bubbleView.layer.masksToBounds = YES;
|
||||
self.bubbleView.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.contentView addSubview:self.bubbleView];
|
||||
|
||||
// 消息标签
|
||||
self.messageLabel = [[UILabel alloc] init];
|
||||
self.messageLabel.numberOfLines = 0;
|
||||
self.messageLabel.font = [UIFont systemFontOfSize:16];
|
||||
self.messageLabel.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.bubbleView addSubview:self.messageLabel];
|
||||
|
||||
// 消息标签约束
|
||||
[NSLayoutConstraint activateConstraints:@[
|
||||
[self.messageLabel.topAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.topAnchor
|
||||
constant:10],
|
||||
[self.messageLabel.bottomAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.bottomAnchor
|
||||
constant:-10],
|
||||
[self.messageLabel.leadingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.leadingAnchor
|
||||
constant:12],
|
||||
[self.messageLabel.trailingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.trailingAnchor
|
||||
constant:-12],
|
||||
]];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)configureWithMessage:(KBAiChatMessage *)message {
|
||||
self.messageLabel.text = message.text;
|
||||
self.messageType = message.type;
|
||||
|
||||
// 移除旧约束
|
||||
for (NSLayoutConstraint *constraint in self.bubbleView.constraints) {
|
||||
if (constraint.firstAttribute == NSLayoutAttributeWidth) {
|
||||
constraint.active = NO;
|
||||
}
|
||||
}
|
||||
|
||||
// 根据消息类型设置样式
|
||||
if (message.type == KBAiChatMessageTypeUser) {
|
||||
// 用户消息:右对齐,主题色背景
|
||||
self.bubbleView.backgroundColor = [UIColor systemBlueColor];
|
||||
self.messageLabel.textColor = [UIColor whiteColor];
|
||||
|
||||
[NSLayoutConstraint deactivateConstraints:self.bubbleView.constraints];
|
||||
[NSLayoutConstraint activateConstraints:@[
|
||||
[self.bubbleView.topAnchor
|
||||
constraintEqualToAnchor:self.contentView.topAnchor
|
||||
constant:4],
|
||||
[self.bubbleView.bottomAnchor
|
||||
constraintEqualToAnchor:self.contentView.bottomAnchor
|
||||
constant:-4],
|
||||
[self.bubbleView.trailingAnchor
|
||||
constraintEqualToAnchor:self.contentView.trailingAnchor
|
||||
constant:-16],
|
||||
[self.bubbleView.widthAnchor
|
||||
constraintLessThanOrEqualToAnchor:self.contentView.widthAnchor
|
||||
multiplier:0.75],
|
||||
|
||||
[self.messageLabel.topAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.topAnchor
|
||||
constant:10],
|
||||
[self.messageLabel.bottomAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.bottomAnchor
|
||||
constant:-10],
|
||||
[self.messageLabel.leadingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.leadingAnchor
|
||||
constant:12],
|
||||
[self.messageLabel.trailingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.trailingAnchor
|
||||
constant:-12],
|
||||
]];
|
||||
} else {
|
||||
// AI 消息:左对齐,浅灰色背景
|
||||
self.bubbleView.backgroundColor = [UIColor systemGray5Color];
|
||||
self.messageLabel.textColor = [UIColor labelColor];
|
||||
|
||||
[NSLayoutConstraint deactivateConstraints:self.bubbleView.constraints];
|
||||
[NSLayoutConstraint activateConstraints:@[
|
||||
[self.bubbleView.topAnchor
|
||||
constraintEqualToAnchor:self.contentView.topAnchor
|
||||
constant:4],
|
||||
[self.bubbleView.bottomAnchor
|
||||
constraintEqualToAnchor:self.contentView.bottomAnchor
|
||||
constant:-4],
|
||||
[self.bubbleView.leadingAnchor
|
||||
constraintEqualToAnchor:self.contentView.leadingAnchor
|
||||
constant:16],
|
||||
[self.bubbleView.widthAnchor
|
||||
constraintLessThanOrEqualToAnchor:self.contentView.widthAnchor
|
||||
multiplier:0.75],
|
||||
|
||||
[self.messageLabel.topAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.topAnchor
|
||||
constant:10],
|
||||
[self.messageLabel.bottomAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.bottomAnchor
|
||||
constant:-10],
|
||||
[self.messageLabel.leadingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.leadingAnchor
|
||||
constant:12],
|
||||
[self.messageLabel.trailingAnchor
|
||||
constraintEqualToAnchor:self.bubbleView.trailingAnchor
|
||||
constant:-12],
|
||||
]];
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
#pragma mark - KBAiChatView
|
||||
|
||||
@interface KBAiChatView () <UITableViewDataSource, UITableViewDelegate>
|
||||
@property(nonatomic, strong) UITableView *tableView;
|
||||
@property(nonatomic, strong) NSMutableArray<KBAiChatMessage *> *messages;
|
||||
@end
|
||||
|
||||
@implementation KBAiChatView
|
||||
|
||||
- (instancetype)initWithFrame:(CGRect)frame {
|
||||
self = [super initWithFrame:frame];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (instancetype)initWithCoder:(NSCoder *)coder {
|
||||
self = [super initWithCoder:coder];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setup {
|
||||
self.messages = [[NSMutableArray alloc] init];
|
||||
|
||||
self.tableView = [[UITableView alloc] initWithFrame:self.bounds
|
||||
style:UITableViewStylePlain];
|
||||
self.tableView.autoresizingMask =
|
||||
UIViewAutoresizingFlexibleWidth | UIViewAutoresizingFlexibleHeight;
|
||||
self.tableView.dataSource = self;
|
||||
self.tableView.delegate = self;
|
||||
self.tableView.separatorStyle = UITableViewCellSeparatorStyleNone;
|
||||
self.tableView.backgroundColor = [UIColor clearColor];
|
||||
self.tableView.estimatedRowHeight = 60;
|
||||
self.tableView.rowHeight = UITableViewAutomaticDimension;
|
||||
[self.tableView registerClass:[KBAiChatBubbleCell class]
|
||||
forCellReuseIdentifier:@"ChatCell"];
|
||||
[self addSubview:self.tableView];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)addUserMessage:(NSString *)text {
|
||||
KBAiChatMessage *message = [KBAiChatMessage userMessageWithText:text];
|
||||
[self.messages addObject:message];
|
||||
|
||||
[self.tableView reloadData];
|
||||
[self scrollToBottom];
|
||||
}
|
||||
|
||||
- (void)addAssistantMessage:(NSString *)text {
|
||||
KBAiChatMessage *message = [KBAiChatMessage assistantMessageWithText:text];
|
||||
[self.messages addObject:message];
|
||||
|
||||
[self.tableView reloadData];
|
||||
[self scrollToBottom];
|
||||
}
|
||||
|
||||
- (void)updateLastAssistantMessage:(NSString *)text {
|
||||
// 查找最后一条 AI 消息
|
||||
for (NSInteger i = self.messages.count - 1; i >= 0; i--) {
|
||||
KBAiChatMessage *message = self.messages[i];
|
||||
if (message.type == KBAiChatMessageTypeAssistant && !message.isComplete) {
|
||||
message.text = text;
|
||||
|
||||
NSIndexPath *indexPath = [NSIndexPath indexPathForRow:i inSection:0];
|
||||
[self.tableView reloadRowsAtIndexPaths:@[ indexPath ]
|
||||
withRowAnimation:UITableViewRowAnimationNone];
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// 如果没找到,添加新消息
|
||||
[self addAssistantMessage:text];
|
||||
}
|
||||
|
||||
- (void)markLastAssistantMessageComplete {
|
||||
for (NSInteger i = self.messages.count - 1; i >= 0; i--) {
|
||||
KBAiChatMessage *message = self.messages[i];
|
||||
if (message.type == KBAiChatMessageTypeAssistant) {
|
||||
message.isComplete = YES;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (void)clearMessages {
|
||||
[self.messages removeAllObjects];
|
||||
[self.tableView reloadData];
|
||||
}
|
||||
|
||||
- (void)scrollToBottom {
|
||||
if (self.messages.count == 0)
|
||||
return;
|
||||
|
||||
NSIndexPath *lastIndexPath =
|
||||
[NSIndexPath indexPathForRow:self.messages.count - 1 inSection:0];
|
||||
[self.tableView scrollToRowAtIndexPath:lastIndexPath
|
||||
atScrollPosition:UITableViewScrollPositionBottom
|
||||
animated:YES];
|
||||
}
|
||||
|
||||
#pragma mark - UITableViewDataSource
|
||||
|
||||
- (NSInteger)tableView:(UITableView *)tableView
|
||||
numberOfRowsInSection:(NSInteger)section {
|
||||
return self.messages.count;
|
||||
}
|
||||
|
||||
- (UITableViewCell *)tableView:(UITableView *)tableView
|
||||
cellForRowAtIndexPath:(NSIndexPath *)indexPath {
|
||||
KBAiChatBubbleCell *cell =
|
||||
[tableView dequeueReusableCellWithIdentifier:@"ChatCell"
|
||||
forIndexPath:indexPath];
|
||||
|
||||
KBAiChatMessage *message = self.messages[indexPath.row];
|
||||
[cell configureWithMessage:message];
|
||||
|
||||
return cell;
|
||||
}
|
||||
|
||||
#pragma mark - UITableViewDelegate
|
||||
|
||||
- (CGFloat)tableView:(UITableView *)tableView
|
||||
estimatedHeightForRowAtIndexPath:(NSIndexPath *)indexPath {
|
||||
return 60;
|
||||
}
|
||||
|
||||
@end
|
||||
56
keyBoard/Class/AiTalk/V/KBAiRecordButton.h
Normal file
56
keyBoard/Class/AiTalk/V/KBAiRecordButton.h
Normal file
@@ -0,0 +1,56 @@
|
||||
//
|
||||
// KBAiRecordButton.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 录音按钮状态
|
||||
typedef NS_ENUM(NSInteger, KBAiRecordButtonState) {
|
||||
KBAiRecordButtonStateNormal, // 正常状态
|
||||
KBAiRecordButtonStateRecording, // 录音中
|
||||
KBAiRecordButtonStateDisabled // 禁用
|
||||
};
|
||||
|
||||
@class KBAiRecordButton;
|
||||
|
||||
/// 录音按钮代理
|
||||
@protocol KBAiRecordButtonDelegate <NSObject>
|
||||
@optional
|
||||
/// 开始按下
|
||||
- (void)recordButtonDidBeginPress:(KBAiRecordButton *)button;
|
||||
/// 结束按下
|
||||
- (void)recordButtonDidEndPress:(KBAiRecordButton *)button;
|
||||
/// 按下被取消(如手指滑出)
|
||||
- (void)recordButtonDidCancelPress:(KBAiRecordButton *)button;
|
||||
@end
|
||||
|
||||
/// 按住说话按钮
|
||||
/// 支持长按手势和波形动画
|
||||
@interface KBAiRecordButton : UIView
|
||||
|
||||
@property(nonatomic, weak) id<KBAiRecordButtonDelegate> delegate;
|
||||
|
||||
/// 当前状态
|
||||
@property(nonatomic, assign) KBAiRecordButtonState state;
|
||||
|
||||
/// 按钮标题(正常状态)
|
||||
@property(nonatomic, copy) NSString *normalTitle;
|
||||
|
||||
/// 按钮标题(录音状态)
|
||||
@property(nonatomic, copy) NSString *recordingTitle;
|
||||
|
||||
/// 主色调
|
||||
@property(nonatomic, strong) UIColor *tintColor;
|
||||
|
||||
/// 更新音量(用于波形动画)
|
||||
/// @param rms 音量 RMS 值 (0.0 - 1.0)
|
||||
- (void)updateVolumeRMS:(float)rms;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
248
keyBoard/Class/AiTalk/V/KBAiRecordButton.m
Normal file
248
keyBoard/Class/AiTalk/V/KBAiRecordButton.m
Normal file
@@ -0,0 +1,248 @@
|
||||
//
|
||||
// KBAiRecordButton.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "KBAiRecordButton.h"
|
||||
#import "KBAiWaveformView.h"
|
||||
|
||||
@interface KBAiRecordButton ()
|
||||
|
||||
@property(nonatomic, strong) UIView *backgroundView;
|
||||
@property(nonatomic, strong) UILabel *titleLabel;
|
||||
@property(nonatomic, strong) KBAiWaveformView *waveformView;
|
||||
@property(nonatomic, strong) UIImageView *micIconView;
|
||||
@property(nonatomic, assign) BOOL isPressing;
|
||||
|
||||
@end
|
||||
|
||||
@implementation KBAiRecordButton
|
||||
|
||||
- (instancetype)initWithFrame:(CGRect)frame {
|
||||
self = [super initWithFrame:frame];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (instancetype)initWithCoder:(NSCoder *)coder {
|
||||
self = [super initWithCoder:coder];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setup {
|
||||
_state = KBAiRecordButtonStateNormal;
|
||||
_normalTitle = @"按住说话";
|
||||
_recordingTitle = @"松开结束";
|
||||
_tintColor = [UIColor systemBlueColor];
|
||||
|
||||
// 背景视图
|
||||
self.backgroundView = [[UIView alloc] init];
|
||||
self.backgroundView.backgroundColor = [UIColor systemGray6Color];
|
||||
self.backgroundView.layer.cornerRadius = 25;
|
||||
self.backgroundView.layer.masksToBounds = YES;
|
||||
self.backgroundView.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self addSubview:self.backgroundView];
|
||||
|
||||
// 麦克风图标
|
||||
self.micIconView = [[UIImageView alloc] init];
|
||||
self.micIconView.image = [UIImage systemImageNamed:@"mic.fill"];
|
||||
self.micIconView.tintColor = self.tintColor;
|
||||
self.micIconView.contentMode = UIViewContentModeScaleAspectFit;
|
||||
self.micIconView.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.backgroundView addSubview:self.micIconView];
|
||||
|
||||
// 标题标签
|
||||
self.titleLabel = [[UILabel alloc] init];
|
||||
self.titleLabel.text = self.normalTitle;
|
||||
self.titleLabel.font = [UIFont systemFontOfSize:16 weight:UIFontWeightMedium];
|
||||
self.titleLabel.textColor = [UIColor labelColor];
|
||||
self.titleLabel.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.backgroundView addSubview:self.titleLabel];
|
||||
|
||||
// 波形视图(录音时显示)
|
||||
self.waveformView = [[KBAiWaveformView alloc] init];
|
||||
self.waveformView.waveColor = self.tintColor;
|
||||
self.waveformView.alpha = 0;
|
||||
self.waveformView.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.backgroundView addSubview:self.waveformView];
|
||||
|
||||
// 布局约束
|
||||
[NSLayoutConstraint activateConstraints:@[
|
||||
[self.backgroundView.topAnchor constraintEqualToAnchor:self.topAnchor],
|
||||
[self.backgroundView.bottomAnchor
|
||||
constraintEqualToAnchor:self.bottomAnchor],
|
||||
[self.backgroundView.leadingAnchor
|
||||
constraintEqualToAnchor:self.leadingAnchor],
|
||||
[self.backgroundView.trailingAnchor
|
||||
constraintEqualToAnchor:self.trailingAnchor],
|
||||
|
||||
[self.micIconView.leadingAnchor
|
||||
constraintEqualToAnchor:self.backgroundView.leadingAnchor
|
||||
constant:20],
|
||||
[self.micIconView.centerYAnchor
|
||||
constraintEqualToAnchor:self.backgroundView.centerYAnchor],
|
||||
[self.micIconView.widthAnchor constraintEqualToConstant:24],
|
||||
[self.micIconView.heightAnchor constraintEqualToConstant:24],
|
||||
|
||||
[self.titleLabel.leadingAnchor
|
||||
constraintEqualToAnchor:self.micIconView.trailingAnchor
|
||||
constant:12],
|
||||
[self.titleLabel.centerYAnchor
|
||||
constraintEqualToAnchor:self.backgroundView.centerYAnchor],
|
||||
|
||||
[self.waveformView.trailingAnchor
|
||||
constraintEqualToAnchor:self.backgroundView.trailingAnchor
|
||||
constant:-20],
|
||||
[self.waveformView.centerYAnchor
|
||||
constraintEqualToAnchor:self.backgroundView.centerYAnchor],
|
||||
[self.waveformView.widthAnchor constraintEqualToConstant:60],
|
||||
[self.waveformView.heightAnchor constraintEqualToConstant:30],
|
||||
]];
|
||||
|
||||
// 添加手势
|
||||
UILongPressGestureRecognizer *longPress =
|
||||
[[UILongPressGestureRecognizer alloc]
|
||||
initWithTarget:self
|
||||
action:@selector(handleLongPress:)];
|
||||
longPress.minimumPressDuration = 0.05;
|
||||
[self addGestureRecognizer:longPress];
|
||||
}
|
||||
|
||||
#pragma mark - Setters
|
||||
|
||||
- (void)setState:(KBAiRecordButtonState)state {
|
||||
if (_state == state)
|
||||
return;
|
||||
_state = state;
|
||||
|
||||
[self updateAppearance];
|
||||
}
|
||||
|
||||
- (void)setTintColor:(UIColor *)tintColor {
|
||||
_tintColor = tintColor;
|
||||
self.micIconView.tintColor = tintColor;
|
||||
self.waveformView.waveColor = tintColor;
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)updateVolumeRMS:(float)rms {
|
||||
[self.waveformView updateWithRMS:rms];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)updateAppearance {
|
||||
switch (self.state) {
|
||||
case KBAiRecordButtonStateNormal:
|
||||
self.titleLabel.text = self.normalTitle;
|
||||
self.backgroundView.backgroundColor = [UIColor systemGray6Color];
|
||||
self.micIconView.alpha = 1;
|
||||
self.waveformView.alpha = 0;
|
||||
[self.waveformView stopAnimation];
|
||||
break;
|
||||
|
||||
case KBAiRecordButtonStateRecording:
|
||||
self.titleLabel.text = self.recordingTitle;
|
||||
self.backgroundView.backgroundColor =
|
||||
[self.tintColor colorWithAlphaComponent:0.15];
|
||||
self.micIconView.alpha = 1;
|
||||
self.waveformView.alpha = 1;
|
||||
[self.waveformView startIdleAnimation];
|
||||
break;
|
||||
|
||||
case KBAiRecordButtonStateDisabled:
|
||||
self.titleLabel.text = self.normalTitle;
|
||||
self.backgroundView.backgroundColor = [UIColor systemGray5Color];
|
||||
self.alpha = 0.5;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)handleLongPress:(UILongPressGestureRecognizer *)gesture {
|
||||
if (self.state == KBAiRecordButtonStateDisabled) {
|
||||
return;
|
||||
}
|
||||
|
||||
CGPoint location = [gesture locationInView:self];
|
||||
BOOL isInside = CGRectContainsPoint(self.bounds, location);
|
||||
|
||||
switch (gesture.state) {
|
||||
case UIGestureRecognizerStateBegan:
|
||||
self.isPressing = YES;
|
||||
[self animateScale:0.95];
|
||||
self.state = KBAiRecordButtonStateRecording;
|
||||
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(recordButtonDidBeginPress:)]) {
|
||||
[self.delegate recordButtonDidBeginPress:self];
|
||||
}
|
||||
break;
|
||||
|
||||
case UIGestureRecognizerStateChanged:
|
||||
if (!isInside && self.isPressing) {
|
||||
// 手指滑出
|
||||
[self animateScale:1.0];
|
||||
} else if (isInside && self.isPressing) {
|
||||
// 手指滑回
|
||||
[self animateScale:0.95];
|
||||
}
|
||||
break;
|
||||
|
||||
case UIGestureRecognizerStateEnded:
|
||||
if (self.isPressing) {
|
||||
self.isPressing = NO;
|
||||
[self animateScale:1.0];
|
||||
self.state = KBAiRecordButtonStateNormal;
|
||||
[self.waveformView reset];
|
||||
|
||||
if (isInside) {
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(recordButtonDidEndPress:)]) {
|
||||
[self.delegate recordButtonDidEndPress:self];
|
||||
}
|
||||
} else {
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(recordButtonDidCancelPress:)]) {
|
||||
[self.delegate recordButtonDidCancelPress:self];
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case UIGestureRecognizerStateCancelled:
|
||||
case UIGestureRecognizerStateFailed:
|
||||
if (self.isPressing) {
|
||||
self.isPressing = NO;
|
||||
[self animateScale:1.0];
|
||||
self.state = KBAiRecordButtonStateNormal;
|
||||
[self.waveformView reset];
|
||||
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(recordButtonDidCancelPress:)]) {
|
||||
[self.delegate recordButtonDidCancelPress:self];
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)animateScale:(CGFloat)scale {
|
||||
[UIView animateWithDuration:0.15
|
||||
animations:^{
|
||||
self.backgroundView.transform =
|
||||
CGAffineTransformMakeScale(scale, scale);
|
||||
}];
|
||||
}
|
||||
|
||||
@end
|
||||
43
keyBoard/Class/AiTalk/V/KBAiWaveformView.h
Normal file
43
keyBoard/Class/AiTalk/V/KBAiWaveformView.h
Normal file
@@ -0,0 +1,43 @@
|
||||
//
|
||||
// KBAiWaveformView.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 波形动画视图
|
||||
/// 根据音量 RMS 值实时显示波形动画
|
||||
@interface KBAiWaveformView : UIView
|
||||
|
||||
/// 波形颜色
|
||||
@property(nonatomic, strong) UIColor *waveColor;
|
||||
|
||||
/// 波形条数量
|
||||
@property(nonatomic, assign) NSInteger barCount;
|
||||
|
||||
/// 波形条宽度
|
||||
@property(nonatomic, assign) CGFloat barWidth;
|
||||
|
||||
/// 波形条间距
|
||||
@property(nonatomic, assign) CGFloat barSpacing;
|
||||
|
||||
/// 更新音量值
|
||||
/// @param rms 音量 RMS 值 (0.0 - 1.0)
|
||||
- (void)updateWithRMS:(float)rms;
|
||||
|
||||
/// 开始动画(空闲波动)
|
||||
- (void)startIdleAnimation;
|
||||
|
||||
/// 停止动画
|
||||
- (void)stopAnimation;
|
||||
|
||||
/// 重置波形
|
||||
- (void)reset;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
163
keyBoard/Class/AiTalk/V/KBAiWaveformView.m
Normal file
163
keyBoard/Class/AiTalk/V/KBAiWaveformView.m
Normal file
@@ -0,0 +1,163 @@
|
||||
//
|
||||
// KBAiWaveformView.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "KBAiWaveformView.h"
|
||||
|
||||
@interface KBAiWaveformView ()
|
||||
@property(nonatomic, strong) NSMutableArray<CAShapeLayer *> *barLayers;
|
||||
@property(nonatomic, strong) NSMutableArray<NSNumber *> *barHeights;
|
||||
@property(nonatomic, strong) CADisplayLink *displayLink;
|
||||
@property(nonatomic, assign) float currentRMS;
|
||||
@property(nonatomic, assign) float targetRMS;
|
||||
@property(nonatomic, assign) BOOL isAnimating;
|
||||
@end
|
||||
|
||||
@implementation KBAiWaveformView
|
||||
|
||||
- (instancetype)initWithFrame:(CGRect)frame {
|
||||
self = [super initWithFrame:frame];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (instancetype)initWithCoder:(NSCoder *)coder {
|
||||
self = [super initWithCoder:coder];
|
||||
if (self) {
|
||||
[self setup];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setup {
|
||||
_waveColor = [UIColor systemBlueColor];
|
||||
_barCount = 5;
|
||||
_barWidth = 4;
|
||||
_barSpacing = 3;
|
||||
_barLayers = [[NSMutableArray alloc] init];
|
||||
_barHeights = [[NSMutableArray alloc] init];
|
||||
_currentRMS = 0;
|
||||
_targetRMS = 0;
|
||||
|
||||
self.backgroundColor = [UIColor clearColor];
|
||||
}
|
||||
|
||||
- (void)layoutSubviews {
|
||||
[super layoutSubviews];
|
||||
[self setupBars];
|
||||
}
|
||||
|
||||
- (void)setupBars {
|
||||
// 移除旧的图层
|
||||
for (CAShapeLayer *layer in self.barLayers) {
|
||||
[layer removeFromSuperlayer];
|
||||
}
|
||||
[self.barLayers removeAllObjects];
|
||||
[self.barHeights removeAllObjects];
|
||||
|
||||
// 计算总宽度
|
||||
CGFloat totalWidth =
|
||||
self.barCount * self.barWidth + (self.barCount - 1) * self.barSpacing;
|
||||
CGFloat startX = (self.bounds.size.width - totalWidth) / 2;
|
||||
CGFloat maxHeight = self.bounds.size.height;
|
||||
CGFloat minHeight = maxHeight * 0.2;
|
||||
|
||||
for (NSInteger i = 0; i < self.barCount; i++) {
|
||||
CAShapeLayer *barLayer = [CAShapeLayer layer];
|
||||
barLayer.fillColor = self.waveColor.CGColor;
|
||||
barLayer.cornerRadius = self.barWidth / 2;
|
||||
|
||||
CGFloat x = startX + i * (self.barWidth + self.barSpacing);
|
||||
CGFloat height = minHeight;
|
||||
CGFloat y = (maxHeight - height) / 2;
|
||||
|
||||
barLayer.frame = CGRectMake(x, y, self.barWidth, height);
|
||||
barLayer.backgroundColor = self.waveColor.CGColor;
|
||||
|
||||
[self.layer addSublayer:barLayer];
|
||||
[self.barLayers addObject:barLayer];
|
||||
[self.barHeights addObject:@(height)];
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)updateWithRMS:(float)rms {
|
||||
self.targetRMS = MIN(MAX(rms, 0), 1);
|
||||
}
|
||||
|
||||
- (void)startIdleAnimation {
|
||||
if (self.isAnimating)
|
||||
return;
|
||||
|
||||
self.isAnimating = YES;
|
||||
self.displayLink =
|
||||
[CADisplayLink displayLinkWithTarget:self
|
||||
selector:@selector(updateAnimation)];
|
||||
[self.displayLink addToRunLoop:[NSRunLoop mainRunLoop]
|
||||
forMode:NSRunLoopCommonModes];
|
||||
}
|
||||
|
||||
- (void)stopAnimation {
|
||||
self.isAnimating = NO;
|
||||
[self.displayLink invalidate];
|
||||
self.displayLink = nil;
|
||||
}
|
||||
|
||||
- (void)reset {
|
||||
self.currentRMS = 0;
|
||||
self.targetRMS = 0;
|
||||
[self updateBarsWithRMS:0];
|
||||
}
|
||||
|
||||
#pragma mark - Animation
|
||||
|
||||
- (void)updateAnimation {
|
||||
// 平滑过渡到目标 RMS
|
||||
CGFloat smoothing = 0.3;
|
||||
self.currentRMS =
|
||||
self.currentRMS + (self.targetRMS - self.currentRMS) * smoothing;
|
||||
|
||||
[self updateBarsWithRMS:self.currentRMS];
|
||||
}
|
||||
|
||||
- (void)updateBarsWithRMS:(float)rms {
|
||||
CGFloat maxHeight = self.bounds.size.height;
|
||||
CGFloat minHeight = maxHeight * 0.2;
|
||||
CGFloat range = maxHeight - minHeight;
|
||||
|
||||
// 为每个条添加略微不同的高度和相位
|
||||
NSTimeInterval time = CACurrentMediaTime();
|
||||
|
||||
for (NSInteger i = 0; i < self.barLayers.count; i++) {
|
||||
CAShapeLayer *layer = self.barLayers[i];
|
||||
|
||||
// 添加基于时间的波动效果
|
||||
CGFloat phase = (CGFloat)i / self.barLayers.count * M_PI * 2;
|
||||
CGFloat wave = sin(time * 3 + phase) * 0.3 + 0.7; // 0.4 - 1.0
|
||||
|
||||
// 计算高度
|
||||
CGFloat heightFactor = rms * wave;
|
||||
CGFloat height = minHeight + range * heightFactor;
|
||||
height = MAX(minHeight, MIN(maxHeight, height));
|
||||
|
||||
// 更新位置
|
||||
CGFloat y = (maxHeight - height) / 2;
|
||||
|
||||
[CATransaction begin];
|
||||
[CATransaction setDisableActions:YES];
|
||||
layer.frame = CGRectMake(layer.frame.origin.x, y, self.barWidth, height);
|
||||
[CATransaction commit];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self stopAnimation];
|
||||
}
|
||||
|
||||
@end
|
||||
17
keyBoard/Class/AiTalk/VC/KBAiMainVC.h
Normal file
17
keyBoard/Class/AiTalk/VC/KBAiMainVC.h
Normal file
@@ -0,0 +1,17 @@
|
||||
//
|
||||
// KBAiMainVC.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <UIKit/UIKit.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// AI 语音陪伴聊天主界面
|
||||
@interface KBAiMainVC : UIViewController
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
291
keyBoard/Class/AiTalk/VC/KBAiMainVC.m
Normal file
291
keyBoard/Class/AiTalk/VC/KBAiMainVC.m
Normal file
@@ -0,0 +1,291 @@
|
||||
//
|
||||
// KBAiMainVC.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "KBAiMainVC.h"
|
||||
#import "ConversationOrchestrator.h"
|
||||
#import "KBAiChatView.h"
|
||||
#import "KBAiRecordButton.h"
|
||||
#import "KBAICommentView.h"
|
||||
#import "LSTPopView.h"
|
||||
|
||||
@interface KBAiMainVC () <KBAiRecordButtonDelegate>
|
||||
@property (nonatomic,weak) LSTPopView *popView;
|
||||
|
||||
// UI
|
||||
@property(nonatomic, strong) KBAiChatView *chatView;
|
||||
@property(nonatomic, strong) KBAiRecordButton *recordButton;
|
||||
@property(nonatomic, strong) UILabel *statusLabel;
|
||||
|
||||
// 核心模块
|
||||
@property(nonatomic, strong) ConversationOrchestrator *orchestrator;
|
||||
|
||||
@end
|
||||
|
||||
@implementation KBAiMainVC
|
||||
|
||||
#pragma mark - Lifecycle
|
||||
|
||||
- (void)viewDidLoad {
|
||||
[super viewDidLoad];
|
||||
|
||||
[self setupUI];
|
||||
[self setupOrchestrator];
|
||||
}
|
||||
|
||||
- (void)viewWillDisappear:(BOOL)animated {
|
||||
[super viewWillDisappear:animated];
|
||||
|
||||
// 页面消失时停止对话
|
||||
[self.orchestrator stop];
|
||||
}
|
||||
|
||||
#pragma mark - UI Setup
|
||||
|
||||
- (void)setupUI {
|
||||
self.view.backgroundColor = [UIColor systemBackgroundColor];
|
||||
self.title = @"AI 助手";
|
||||
|
||||
// 安全区域
|
||||
UILayoutGuide *safeArea = self.view.safeAreaLayoutGuide;
|
||||
|
||||
// 状态标签
|
||||
self.statusLabel = [[UILabel alloc] init];
|
||||
self.statusLabel.text = @"按住按钮开始对话";
|
||||
self.statusLabel.font = [UIFont systemFontOfSize:14];
|
||||
self.statusLabel.textColor = [UIColor secondaryLabelColor];
|
||||
self.statusLabel.textAlignment = NSTextAlignmentCenter;
|
||||
self.statusLabel.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.view addSubview:self.statusLabel];
|
||||
|
||||
// 聊天视图
|
||||
self.chatView = [[KBAiChatView alloc] init];
|
||||
self.chatView.backgroundColor = [UIColor systemBackgroundColor];
|
||||
self.chatView.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.view addSubview:self.chatView];
|
||||
|
||||
// 录音按钮
|
||||
self.recordButton = [[KBAiRecordButton alloc] init];
|
||||
self.recordButton.delegate = self;
|
||||
self.recordButton.translatesAutoresizingMaskIntoConstraints = NO;
|
||||
[self.view addSubview:self.recordButton];
|
||||
|
||||
// 布局约束
|
||||
[NSLayoutConstraint activateConstraints:@[
|
||||
// 状态标签
|
||||
[self.statusLabel.topAnchor constraintEqualToAnchor:safeArea.topAnchor
|
||||
constant:8],
|
||||
[self.statusLabel.leadingAnchor
|
||||
constraintEqualToAnchor:safeArea.leadingAnchor
|
||||
constant:16],
|
||||
[self.statusLabel.trailingAnchor
|
||||
constraintEqualToAnchor:safeArea.trailingAnchor
|
||||
constant:-16],
|
||||
|
||||
// 聊天视图
|
||||
[self.chatView.topAnchor
|
||||
constraintEqualToAnchor:self.statusLabel.bottomAnchor
|
||||
constant:8],
|
||||
[self.chatView.leadingAnchor
|
||||
constraintEqualToAnchor:safeArea.leadingAnchor],
|
||||
[self.chatView.trailingAnchor
|
||||
constraintEqualToAnchor:safeArea.trailingAnchor],
|
||||
[self.chatView.bottomAnchor
|
||||
constraintEqualToAnchor:self.recordButton.topAnchor
|
||||
constant:-16],
|
||||
|
||||
// 录音按钮
|
||||
[self.recordButton.leadingAnchor
|
||||
constraintEqualToAnchor:safeArea.leadingAnchor
|
||||
constant:20],
|
||||
[self.recordButton.trailingAnchor
|
||||
constraintEqualToAnchor:safeArea.trailingAnchor
|
||||
constant:-20],
|
||||
[self.recordButton.bottomAnchor
|
||||
constraintEqualToAnchor:safeArea.bottomAnchor
|
||||
constant:-16],
|
||||
[self.recordButton.heightAnchor constraintEqualToConstant:50],
|
||||
]];
|
||||
}
|
||||
|
||||
#pragma mark - Orchestrator Setup
|
||||
|
||||
- (void)setupOrchestrator {
|
||||
self.orchestrator = [[ConversationOrchestrator alloc] init];
|
||||
|
||||
// 配置服务器地址(TODO: 替换为实际地址)
|
||||
// self.orchestrator.asrServerURL = @"wss://your-asr-server.com/ws/asr";
|
||||
// self.orchestrator.llmServerURL =
|
||||
// @"https://your-llm-server.com/api/chat/stream";
|
||||
// self.orchestrator.ttsServerURL = @"https://your-tts-server.com/api/tts";
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
|
||||
// 状态变化回调
|
||||
self.orchestrator.onStateChange = ^(ConversationState state) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf updateStatusForState:state];
|
||||
};
|
||||
|
||||
// 实时识别文本回调
|
||||
self.orchestrator.onPartialText = ^(NSString *text) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
strongSelf.statusLabel.text = text.length > 0 ? text : @"正在识别...";
|
||||
};
|
||||
|
||||
// 用户最终文本回调
|
||||
self.orchestrator.onUserFinalText = ^(NSString *text) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
if (text.length > 0) {
|
||||
[strongSelf.chatView addUserMessage:text];
|
||||
}
|
||||
};
|
||||
|
||||
// AI 可见文本回调(打字机效果)
|
||||
self.orchestrator.onAssistantVisibleText = ^(NSString *text) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf.chatView updateLastAssistantMessage:text];
|
||||
};
|
||||
|
||||
// AI 完整回复回调
|
||||
self.orchestrator.onAssistantFullText = ^(NSString *text) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf.chatView updateLastAssistantMessage:text];
|
||||
[strongSelf.chatView markLastAssistantMessageComplete];
|
||||
};
|
||||
|
||||
// 音量更新回调
|
||||
self.orchestrator.onVolumeUpdate = ^(float rms) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf.recordButton updateVolumeRMS:rms];
|
||||
};
|
||||
|
||||
// AI 开始说话
|
||||
self.orchestrator.onSpeakingStart = ^{
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
// 添加空的 AI 消息占位
|
||||
[strongSelf.chatView addAssistantMessage:@""];
|
||||
};
|
||||
|
||||
// AI 说话结束
|
||||
self.orchestrator.onSpeakingEnd = ^{
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf.chatView markLastAssistantMessageComplete];
|
||||
};
|
||||
|
||||
// 错误回调
|
||||
self.orchestrator.onError = ^(NSError *error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
[strongSelf showError:error];
|
||||
};
|
||||
}
|
||||
|
||||
#pragma mark - 事件
|
||||
- (void)showComment{
|
||||
CGFloat customViewHeight = KB_SCREEN_HEIGHT*(0.8);
|
||||
KBAICommentView *customView = [[KBAICommentView alloc] initWithFrame:CGRectMake(0, 0, KB_SCREEN_WIDTH, customViewHeight)];
|
||||
LSTPopView *popView = [LSTPopView initWithCustomView:customView
|
||||
parentView:self.view
|
||||
popStyle:LSTPopStyleSmoothFromBottom
|
||||
dismissStyle:LSTDismissStyleSmoothToBottom];
|
||||
self.popView = popView;
|
||||
popView.priority = 1000;
|
||||
popView.hemStyle = LSTHemStyleBottom;
|
||||
popView.dragStyle = LSTDragStyleY_Positive;
|
||||
popView.dragDistance = customViewHeight*0.5;
|
||||
popView.sweepStyle = LSTSweepStyleY_Positive;
|
||||
popView.swipeVelocity = 1600;
|
||||
popView.sweepDismissStyle = LSTSweepDismissStyleSmooth;
|
||||
|
||||
|
||||
[popView pop];
|
||||
}
|
||||
|
||||
#pragma mark - UI Updates
|
||||
|
||||
- (void)updateStatusForState:(ConversationState)state {
|
||||
switch (state) {
|
||||
case ConversationStateIdle:
|
||||
self.statusLabel.text = @"按住按钮开始对话";
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
break;
|
||||
|
||||
case ConversationStateListening:
|
||||
self.statusLabel.text = @"正在聆听...";
|
||||
self.recordButton.state = KBAiRecordButtonStateRecording;
|
||||
break;
|
||||
|
||||
case ConversationStateRecognizing:
|
||||
self.statusLabel.text = @"正在识别...";
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
break;
|
||||
|
||||
case ConversationStateThinking:
|
||||
self.statusLabel.text = @"AI 正在思考...";
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
break;
|
||||
|
||||
case ConversationStateSpeaking:
|
||||
self.statusLabel.text = @"AI 正在回复...";
|
||||
self.recordButton.state = KBAiRecordButtonStateNormal;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
- (void)showError:(NSError *)error {
|
||||
UIAlertController *alert =
|
||||
[UIAlertController alertControllerWithTitle:@"错误"
|
||||
message:error.localizedDescription
|
||||
preferredStyle:UIAlertControllerStyleAlert];
|
||||
[alert addAction:[UIAlertAction actionWithTitle:@"确定"
|
||||
style:UIAlertActionStyleDefault
|
||||
handler:nil]];
|
||||
[self presentViewController:alert animated:YES completion:nil];
|
||||
}
|
||||
|
||||
#pragma mark - KBAiRecordButtonDelegate
|
||||
|
||||
- (void)recordButtonDidBeginPress:(KBAiRecordButton *)button {
|
||||
[self.orchestrator userDidPressRecord];
|
||||
}
|
||||
|
||||
- (void)recordButtonDidEndPress:(KBAiRecordButton *)button {
|
||||
[self.orchestrator userDidReleaseRecord];
|
||||
}
|
||||
|
||||
- (void)recordButtonDidCancelPress:(KBAiRecordButton *)button {
|
||||
// 取消录音(同样调用 release,ASR 会返回空或部分结果)
|
||||
[self.orchestrator userDidReleaseRecord];
|
||||
}
|
||||
|
||||
@end
|
||||
51
keyBoard/Class/AiTalk/VM/ASRStreamClient.h
Normal file
51
keyBoard/Class/AiTalk/VM/ASRStreamClient.h
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// ASRStreamClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// ASR 流式识别客户端代理
|
||||
@protocol ASRStreamClientDelegate <NSObject>
|
||||
@required
|
||||
/// 收到实时识别结果(部分文本)
|
||||
- (void)asrClientDidReceivePartialText:(NSString *)text;
|
||||
/// 收到最终识别结果
|
||||
- (void)asrClientDidReceiveFinalText:(NSString *)text;
|
||||
/// 识别失败
|
||||
- (void)asrClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// ASR 流式识别客户端
|
||||
/// 使用 NSURLSessionWebSocketTask 实现流式语音识别
|
||||
@interface ASRStreamClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<ASRStreamClientDelegate> delegate;
|
||||
|
||||
/// ASR 服务器 WebSocket URL
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
/// 是否已连接
|
||||
@property(nonatomic, assign, readonly, getter=isConnected) BOOL connected;
|
||||
|
||||
/// 开始新的识别会话
|
||||
/// @param sessionId 会话 ID
|
||||
- (void)startWithSessionId:(NSString *)sessionId;
|
||||
|
||||
/// 发送 PCM 音频帧(20ms / 640 bytes)
|
||||
/// @param pcmFrame PCM 数据
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame;
|
||||
|
||||
/// 结束当前会话,请求最终结果
|
||||
- (void)finalize;
|
||||
|
||||
/// 取消会话
|
||||
- (void)cancel;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
271
keyBoard/Class/AiTalk/VM/ASRStreamClient.m
Normal file
271
keyBoard/Class/AiTalk/VM/ASRStreamClient.m
Normal file
@@ -0,0 +1,271 @@
|
||||
//
|
||||
// ASRStreamClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "ASRStreamClient.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
|
||||
@interface ASRStreamClient () <NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionWebSocketTask *webSocketTask;
|
||||
@property(nonatomic, copy) NSString *currentSessionId;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL connected;
|
||||
|
||||
@end
|
||||
|
||||
@implementation ASRStreamClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.asr.network",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
// TODO: 替换为实际的 ASR 服务器地址
|
||||
_serverURL = @"wss://your-asr-server.com/ws/asr";
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self cancel];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)startWithSessionId:(NSString *)sessionId {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self cancelInternal];
|
||||
|
||||
self.currentSessionId = sessionId;
|
||||
|
||||
// 创建 WebSocket 连接
|
||||
NSURL *url = [NSURL URLWithString:self.serverURL];
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
self.webSocketTask = [self.urlSession webSocketTaskWithURL:url];
|
||||
[self.webSocketTask resume];
|
||||
|
||||
// 发送 start 消息
|
||||
NSDictionary *startMessage = @{
|
||||
@"type" : @"start",
|
||||
@"sessionId" : sessionId,
|
||||
@"format" : @"pcm_s16le",
|
||||
@"sampleRate" : @(kAudioSampleRate),
|
||||
@"channels" : @(kAudioChannels)
|
||||
};
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:startMessage
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString = [[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
|
||||
[self.webSocketTask
|
||||
sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
self.connected = YES;
|
||||
[self receiveMessage];
|
||||
NSLog(@"[ASRStreamClient] Started session: %@", sessionId);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)sendAudioPCMFrame:(NSData *)pcmFrame {
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithData:pcmFrame];
|
||||
[self.webSocketTask sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
NSLog(@"[ASRStreamClient] Failed to send audio frame: %@",
|
||||
error.localizedDescription);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)finalize {
|
||||
if (!self.connected || !self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
NSDictionary *finalizeMessage =
|
||||
@{@"type" : @"finalize", @"sessionId" : self.currentSessionId ?: @""};
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:finalizeMessage
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString = [[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
|
||||
[self.webSocketTask sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[self reportError:error];
|
||||
} else {
|
||||
NSLog(@"[ASRStreamClient] Sent finalize for session: %@",
|
||||
self.currentSessionId);
|
||||
}
|
||||
}];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self cancelInternal];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)cancelInternal {
|
||||
self.connected = NO;
|
||||
|
||||
if (self.webSocketTask) {
|
||||
[self.webSocketTask cancel];
|
||||
self.webSocketTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
|
||||
self.currentSessionId = nil;
|
||||
}
|
||||
|
||||
- (void)receiveMessage {
|
||||
if (!self.webSocketTask) {
|
||||
return;
|
||||
}
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.webSocketTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
if (error) {
|
||||
// 检查是否是正常关闭
|
||||
if (error.code != 57 && error.code != NSURLErrorCancelled) {
|
||||
[strongSelf reportError:error];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
[strongSelf handleTextMessage:message.string];
|
||||
}
|
||||
|
||||
// 继续接收下一条消息
|
||||
[strongSelf receiveMessage];
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)handleTextMessage:(NSString *)text {
|
||||
NSData *data = [text dataUsingEncoding:NSUTF8StringEncoding];
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&jsonError];
|
||||
|
||||
if (jsonError) {
|
||||
NSLog(@"[ASRStreamClient] Failed to parse message: %@", text);
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *type = json[@"type"];
|
||||
|
||||
if ([type isEqualToString:@"partial"]) {
|
||||
NSString *partialText = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(asrClientDidReceivePartialText:)]) {
|
||||
[self.delegate asrClientDidReceivePartialText:partialText];
|
||||
}
|
||||
});
|
||||
} else if ([type isEqualToString:@"final"]) {
|
||||
NSString *finalText = json[@"text"] ?: @"";
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(asrClientDidReceiveFinalText:)]) {
|
||||
[self.delegate asrClientDidReceiveFinalText:finalText];
|
||||
}
|
||||
});
|
||||
// 收到最终结果后关闭连接
|
||||
[self cancelInternal];
|
||||
} else if ([type isEqualToString:@"error"]) {
|
||||
NSInteger code = [json[@"code"] integerValue];
|
||||
NSString *message = json[@"message"] ?: @"Unknown error";
|
||||
NSError *error =
|
||||
[NSError errorWithDomain:@"ASRStreamClient"
|
||||
code:code
|
||||
userInfo:@{NSLocalizedDescriptionKey : message}];
|
||||
[self reportError:error];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(asrClientDidFail:)]) {
|
||||
[self.delegate asrClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionWebSocketDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didOpenWithProtocol:(NSString *)protocol {
|
||||
NSLog(@"[ASRStreamClient] WebSocket connected with protocol: %@", protocol);
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
webSocketTask:(NSURLSessionWebSocketTask *)webSocketTask
|
||||
didCloseWithCode:(NSURLSessionWebSocketCloseCode)closeCode
|
||||
reason:(NSData *)reason {
|
||||
NSLog(@"[ASRStreamClient] WebSocket closed with code: %ld", (long)closeCode);
|
||||
self.connected = NO;
|
||||
}
|
||||
|
||||
@end
|
||||
55
keyBoard/Class/AiTalk/VM/AudioCaptureManager.h
Normal file
55
keyBoard/Class/AiTalk/VM/AudioCaptureManager.h
Normal file
@@ -0,0 +1,55 @@
|
||||
//
|
||||
// AudioCaptureManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 音频采集参数(固定值,便于端到端稳定)
|
||||
/// Sample Rate: 16000 Hz
|
||||
/// Channels: 1 (Mono)
|
||||
/// Format: PCM Int16 (pcm_s16le)
|
||||
/// Frame Duration: 20ms (320 samples, 640 bytes)
|
||||
extern const double kAudioSampleRate; // 16000.0
|
||||
extern const int kAudioChannels; // 1
|
||||
extern const NSUInteger kAudioFrameDuration; // 20 (ms)
|
||||
extern const NSUInteger kAudioFrameSamples; // 320 (16000 * 0.02)
|
||||
extern const NSUInteger kAudioFrameBytes; // 640 (320 * 2)
|
||||
|
||||
/// 音频采集管理器代理
|
||||
@protocol AudioCaptureManagerDelegate <NSObject>
|
||||
@required
|
||||
/// 输出 PCM 帧(20ms / 640 bytes)
|
||||
/// @param pcmFrame 640 字节的 PCM Int16 数据
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame;
|
||||
|
||||
@optional
|
||||
/// 更新 RMS 值(用于波形显示)
|
||||
/// @param rms 当前音量的 RMS 值 (0.0 - 1.0)
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms;
|
||||
@end
|
||||
|
||||
/// 音频采集管理器
|
||||
/// 使用 AVAudioEngine 采集麦克风音频,输出 20ms PCM 帧
|
||||
@interface AudioCaptureManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<AudioCaptureManagerDelegate> delegate;
|
||||
|
||||
/// 是否正在采集
|
||||
@property(nonatomic, assign, readonly, getter=isCapturing) BOOL capturing;
|
||||
|
||||
/// 开始采集
|
||||
/// @param error 错误信息
|
||||
/// @return 是否启动成功
|
||||
- (BOOL)startCapture:(NSError **)error;
|
||||
|
||||
/// 停止采集
|
||||
- (void)stopCapture;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
269
keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
Normal file
269
keyBoard/Class/AiTalk/VM/AudioCaptureManager.m
Normal file
@@ -0,0 +1,269 @@
|
||||
//
|
||||
// AudioCaptureManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "AudioCaptureManager.h"
|
||||
#import <AVFoundation/AVFoundation.h>
|
||||
|
||||
// 音频采集参数常量
|
||||
const double kAudioSampleRate = 16000.0;
|
||||
const int kAudioChannels = 1;
|
||||
const NSUInteger kAudioFrameDuration = 20; // ms
|
||||
const NSUInteger kAudioFrameSamples = 320; // 16000 * 0.02
|
||||
const NSUInteger kAudioFrameBytes = 640; // 320 * 2 (Int16)
|
||||
|
||||
@interface AudioCaptureManager ()
|
||||
|
||||
@property(nonatomic, strong) AVAudioEngine *audioEngine;
|
||||
@property(nonatomic, strong) dispatch_queue_t audioQueue;
|
||||
@property(nonatomic, assign) BOOL capturing;
|
||||
|
||||
// Ring buffer for accumulating samples to form 20ms frames
|
||||
@property(nonatomic, strong) NSMutableData *ringBuffer;
|
||||
@property(nonatomic, assign) NSUInteger ringBufferWriteIndex;
|
||||
|
||||
@end
|
||||
|
||||
@implementation AudioCaptureManager
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_audioEngine = [[AVAudioEngine alloc] init];
|
||||
_audioQueue = dispatch_queue_create("com.keyboard.aitalk.audiocapture",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_ringBuffer = [[NSMutableData alloc]
|
||||
initWithLength:kAudioFrameBytes * 4]; // Buffer for multiple frames
|
||||
_ringBufferWriteIndex = 0;
|
||||
_capturing = NO;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self stopCapture];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (BOOL)startCapture:(NSError **)error {
|
||||
if (self.capturing) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
AVAudioInputNode *inputNode = self.audioEngine.inputNode;
|
||||
|
||||
// 获取输入格式
|
||||
AVAudioFormat *inputFormat = [inputNode outputFormatForBus:0];
|
||||
|
||||
// 目标格式:16kHz, Mono, Int16
|
||||
AVAudioFormat *targetFormat =
|
||||
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatInt16
|
||||
sampleRate:kAudioSampleRate
|
||||
channels:kAudioChannels
|
||||
interleaved:YES];
|
||||
|
||||
// 创建格式转换器
|
||||
AVAudioConverter *converter =
|
||||
[[AVAudioConverter alloc] initFromFormat:inputFormat
|
||||
toFormat:targetFormat];
|
||||
if (!converter) {
|
||||
if (error) {
|
||||
*error = [NSError errorWithDomain:@"AudioCaptureManager"
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey :
|
||||
@"Failed to create audio converter"
|
||||
}];
|
||||
}
|
||||
return NO;
|
||||
}
|
||||
|
||||
// 计算合适的 buffer size(约 20ms 的输入采样数)
|
||||
AVAudioFrameCount bufferSize =
|
||||
(AVAudioFrameCount)(inputFormat.sampleRate * 0.02);
|
||||
|
||||
// 安装 tap
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[inputNode installTapOnBus:0
|
||||
bufferSize:bufferSize
|
||||
format:inputFormat
|
||||
block:^(AVAudioPCMBuffer *_Nonnull buffer,
|
||||
AVAudioTime *_Nonnull when) {
|
||||
[weakSelf processAudioBuffer:buffer
|
||||
withConverter:converter
|
||||
targetFormat:targetFormat];
|
||||
}];
|
||||
|
||||
// 启动引擎
|
||||
NSError *startError = nil;
|
||||
[self.audioEngine prepare];
|
||||
|
||||
if (![self.audioEngine startAndReturnError:&startError]) {
|
||||
[inputNode removeTapOnBus:0];
|
||||
if (error) {
|
||||
*error = startError;
|
||||
}
|
||||
NSLog(@"[AudioCaptureManager] Failed to start engine: %@",
|
||||
startError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
self.capturing = YES;
|
||||
self.ringBufferWriteIndex = 0;
|
||||
|
||||
NSLog(@"[AudioCaptureManager] Started capturing at %.0f Hz",
|
||||
inputFormat.sampleRate);
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)stopCapture {
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self.audioEngine.inputNode removeTapOnBus:0];
|
||||
[self.audioEngine stop];
|
||||
|
||||
self.capturing = NO;
|
||||
self.ringBufferWriteIndex = 0;
|
||||
|
||||
NSLog(@"[AudioCaptureManager] Stopped capturing");
|
||||
}
|
||||
|
||||
#pragma mark - Audio Processing
|
||||
|
||||
- (void)processAudioBuffer:(AVAudioPCMBuffer *)buffer
|
||||
withConverter:(AVAudioConverter *)converter
|
||||
targetFormat:(AVAudioFormat *)targetFormat {
|
||||
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 计算输出帧数
|
||||
AVAudioFrameCount outputFrameCapacity =
|
||||
(AVAudioFrameCount)(buffer.frameLength *
|
||||
(kAudioSampleRate / buffer.format.sampleRate)) +
|
||||
1;
|
||||
|
||||
// 创建输出 buffer
|
||||
AVAudioPCMBuffer *outputBuffer =
|
||||
[[AVAudioPCMBuffer alloc] initWithPCMFormat:targetFormat
|
||||
frameCapacity:outputFrameCapacity];
|
||||
|
||||
// 格式转换
|
||||
NSError *conversionError = nil;
|
||||
AVAudioConverterInputBlock inputBlock = ^AVAudioBuffer *_Nullable(
|
||||
AVAudioPacketCount inNumberOfPackets,
|
||||
AVAudioConverterInputStatus *_Nonnull outStatus) {
|
||||
*outStatus = AVAudioConverterInputStatus_HaveData;
|
||||
return buffer;
|
||||
};
|
||||
|
||||
AVAudioConverterOutputStatus status =
|
||||
[converter convertToBuffer:outputBuffer
|
||||
error:&conversionError
|
||||
withInputFromBlock:inputBlock];
|
||||
|
||||
if (status == AVAudioConverterOutputStatus_Error) {
|
||||
NSLog(@"[AudioCaptureManager] Conversion error: %@",
|
||||
conversionError.localizedDescription);
|
||||
return;
|
||||
}
|
||||
|
||||
// 获取 Int16 数据
|
||||
int16_t *samples = (int16_t *)outputBuffer.int16ChannelData[0];
|
||||
NSUInteger sampleCount = outputBuffer.frameLength;
|
||||
NSUInteger byteCount = sampleCount * sizeof(int16_t);
|
||||
|
||||
// 计算 RMS
|
||||
[self calculateAndReportRMS:samples sampleCount:sampleCount];
|
||||
|
||||
// 将数据添加到 ring buffer 并输出完整帧
|
||||
dispatch_async(self.audioQueue, ^{
|
||||
[self appendToRingBuffer:samples byteCount:byteCount];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)appendToRingBuffer:(int16_t *)samples byteCount:(NSUInteger)byteCount {
|
||||
// 将新数据追加到 ring buffer
|
||||
uint8_t *ringBufferBytes = (uint8_t *)self.ringBuffer.mutableBytes;
|
||||
NSUInteger ringBufferLength = self.ringBuffer.length;
|
||||
|
||||
NSUInteger bytesToCopy = byteCount;
|
||||
NSUInteger sourceOffset = 0;
|
||||
|
||||
while (bytesToCopy > 0) {
|
||||
NSUInteger spaceAvailable = ringBufferLength - self.ringBufferWriteIndex;
|
||||
NSUInteger copySize = MIN(bytesToCopy, spaceAvailable);
|
||||
|
||||
memcpy(ringBufferBytes + self.ringBufferWriteIndex,
|
||||
(uint8_t *)samples + sourceOffset, copySize);
|
||||
self.ringBufferWriteIndex += copySize;
|
||||
sourceOffset += copySize;
|
||||
bytesToCopy -= copySize;
|
||||
|
||||
// 检查是否有完整的 20ms 帧
|
||||
while (self.ringBufferWriteIndex >= kAudioFrameBytes) {
|
||||
// 提取一个完整帧
|
||||
NSData *frame = [NSData dataWithBytes:ringBufferBytes
|
||||
length:kAudioFrameBytes];
|
||||
|
||||
// 移动剩余数据到开头
|
||||
NSUInteger remaining = self.ringBufferWriteIndex - kAudioFrameBytes;
|
||||
if (remaining > 0) {
|
||||
memmove(ringBufferBytes, ringBufferBytes + kAudioFrameBytes, remaining);
|
||||
}
|
||||
self.ringBufferWriteIndex = remaining;
|
||||
|
||||
// 回调输出帧
|
||||
[self outputPCMFrame:frame];
|
||||
}
|
||||
|
||||
// 如果 ring buffer 已满,从头开始
|
||||
if (self.ringBufferWriteIndex >= ringBufferLength) {
|
||||
self.ringBufferWriteIndex = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (void)outputPCMFrame:(NSData *)frame {
|
||||
if (!self.capturing) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioCaptureManagerDidOutputPCMFrame:)]) {
|
||||
[self.delegate audioCaptureManagerDidOutputPCMFrame:frame];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)calculateAndReportRMS:(int16_t *)samples
|
||||
sampleCount:(NSUInteger)sampleCount {
|
||||
if (sampleCount == 0)
|
||||
return;
|
||||
|
||||
// 计算 RMS
|
||||
double sum = 0.0;
|
||||
for (NSUInteger i = 0; i < sampleCount; i++) {
|
||||
double sample = (double)samples[i] / 32768.0; // Normalize to -1.0 ~ 1.0
|
||||
sum += sample * sample;
|
||||
}
|
||||
double rms = sqrt(sum / sampleCount);
|
||||
float rmsFloat = (float)MIN(rms * 2.0, 1.0); // Scale and clamp to 0.0 ~ 1.0
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(audioCaptureManagerDidUpdateRMS:)]) {
|
||||
[self.delegate audioCaptureManagerDidUpdateRMS:rmsFloat];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@end
|
||||
66
keyBoard/Class/AiTalk/VM/AudioSessionManager.h
Normal file
66
keyBoard/Class/AiTalk/VM/AudioSessionManager.h
Normal file
@@ -0,0 +1,66 @@
|
||||
//
|
||||
// AudioSessionManager.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <AVFoundation/AVFoundation.h>
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 音频会话中断类型
|
||||
typedef NS_ENUM(NSInteger, KBAudioSessionInterruptionType) {
|
||||
KBAudioSessionInterruptionTypeBegan, // 中断开始(来电等)
|
||||
KBAudioSessionInterruptionTypeEnded // 中断结束
|
||||
};
|
||||
|
||||
/// 音频会话管理器代理
|
||||
@protocol AudioSessionManagerDelegate <NSObject>
|
||||
@optional
|
||||
/// 音频会话被中断
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type;
|
||||
/// 音频路由发生变化
|
||||
- (void)audioSessionManagerRouteDidChange;
|
||||
/// 麦克风权限状态变化
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied;
|
||||
@end
|
||||
|
||||
/// 音频会话管理器
|
||||
/// 负责 AVAudioSession 配置、权限请求、中断处理
|
||||
@interface AudioSessionManager : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<AudioSessionManagerDelegate> delegate;
|
||||
|
||||
/// 单例
|
||||
+ (instancetype)sharedManager;
|
||||
|
||||
/// 请求麦克风权限
|
||||
/// @param completion 完成回调,granted 表示是否获得权限
|
||||
- (void)requestMicrophonePermission:(void (^)(BOOL granted))completion;
|
||||
|
||||
/// 检查麦克风权限状态
|
||||
- (BOOL)hasMicrophonePermission;
|
||||
|
||||
/// 配置音频会话为对话模式(录音+播放)
|
||||
/// @param error 错误信息
|
||||
/// @return 是否配置成功
|
||||
- (BOOL)configureForConversation:(NSError **)error;
|
||||
|
||||
/// 配置音频会话为仅播放模式
|
||||
/// @param error 错误信息
|
||||
/// @return 是否配置成功
|
||||
- (BOOL)configureForPlayback:(NSError **)error;
|
||||
|
||||
/// 激活音频会话
|
||||
/// @param error 错误信息
|
||||
/// @return 是否激活成功
|
||||
- (BOOL)activateSession:(NSError **)error;
|
||||
|
||||
/// 停用音频会话
|
||||
- (void)deactivateSession;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
234
keyBoard/Class/AiTalk/VM/AudioSessionManager.m
Normal file
234
keyBoard/Class/AiTalk/VM/AudioSessionManager.m
Normal file
@@ -0,0 +1,234 @@
|
||||
//
|
||||
// AudioSessionManager.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "AudioSessionManager.h"
|
||||
|
||||
@interface AudioSessionManager ()
|
||||
@property(nonatomic, assign) BOOL isSessionActive;
|
||||
@end
|
||||
|
||||
@implementation AudioSessionManager
|
||||
|
||||
#pragma mark - Singleton
|
||||
|
||||
+ (instancetype)sharedManager {
|
||||
static AudioSessionManager *instance = nil;
|
||||
static dispatch_once_t onceToken;
|
||||
dispatch_once(&onceToken, ^{
|
||||
instance = [[AudioSessionManager alloc] init];
|
||||
});
|
||||
return instance;
|
||||
}
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_isSessionActive = NO;
|
||||
[self setupNotifications];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[[NSNotificationCenter defaultCenter] removeObserver:self];
|
||||
}
|
||||
|
||||
#pragma mark - Notifications
|
||||
|
||||
- (void)setupNotifications {
|
||||
// 监听音频会话中断通知
|
||||
[[NSNotificationCenter defaultCenter]
|
||||
addObserver:self
|
||||
selector:@selector(handleInterruption:)
|
||||
name:AVAudioSessionInterruptionNotification
|
||||
object:nil];
|
||||
|
||||
// 监听音频路由变化通知
|
||||
[[NSNotificationCenter defaultCenter]
|
||||
addObserver:self
|
||||
selector:@selector(handleRouteChange:)
|
||||
name:AVAudioSessionRouteChangeNotification
|
||||
object:nil];
|
||||
}
|
||||
|
||||
- (void)handleInterruption:(NSNotification *)notification {
|
||||
NSDictionary *info = notification.userInfo;
|
||||
AVAudioSessionInterruptionType type =
|
||||
[info[AVAudioSessionInterruptionTypeKey] unsignedIntegerValue];
|
||||
|
||||
if (type == AVAudioSessionInterruptionTypeBegan) {
|
||||
// 中断开始:来电、闹钟等
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) {
|
||||
[self.delegate audioSessionManagerDidInterrupt:
|
||||
KBAudioSessionInterruptionTypeBegan];
|
||||
}
|
||||
});
|
||||
} else if (type == AVAudioSessionInterruptionTypeEnded) {
|
||||
// 中断结束
|
||||
AVAudioSessionInterruptionOptions options =
|
||||
[info[AVAudioSessionInterruptionOptionKey] unsignedIntegerValue];
|
||||
if (options & AVAudioSessionInterruptionOptionShouldResume) {
|
||||
// 可以恢复播放
|
||||
[self activateSession:nil];
|
||||
}
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(audioSessionManagerDidInterrupt:)]) {
|
||||
[self.delegate audioSessionManagerDidInterrupt:
|
||||
KBAudioSessionInterruptionTypeEnded];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
- (void)handleRouteChange:(NSNotification *)notification {
|
||||
NSDictionary *info = notification.userInfo;
|
||||
AVAudioSessionRouteChangeReason reason =
|
||||
[info[AVAudioSessionRouteChangeReasonKey] unsignedIntegerValue];
|
||||
|
||||
switch (reason) {
|
||||
case AVAudioSessionRouteChangeReasonOldDeviceUnavailable:
|
||||
case AVAudioSessionRouteChangeReasonNewDeviceAvailable: {
|
||||
// 旧设备不可用(如耳机拔出)或新设备可用(如耳机插入)
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioSessionManagerRouteDidChange)]) {
|
||||
[self.delegate audioSessionManagerRouteDidChange];
|
||||
}
|
||||
});
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Microphone Permission
|
||||
|
||||
- (void)requestMicrophonePermission:(void (^)(BOOL))completion {
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
|
||||
[session requestRecordPermission:^(BOOL granted) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (!granted) {
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioSessionManagerMicrophonePermissionDenied)]) {
|
||||
[self.delegate audioSessionManagerMicrophonePermissionDenied];
|
||||
}
|
||||
}
|
||||
if (completion) {
|
||||
completion(granted);
|
||||
}
|
||||
});
|
||||
}];
|
||||
}
|
||||
|
||||
- (BOOL)hasMicrophonePermission {
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
return session.recordPermission == AVAudioSessionRecordPermissionGranted;
|
||||
}
|
||||
|
||||
#pragma mark - Session Configuration
|
||||
|
||||
- (BOOL)configureForConversation:(NSError **)error {
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
|
||||
// 配置为录音+播放模式
|
||||
// Category: PlayAndRecord - 同时支持录音和播放
|
||||
// Mode: VoiceChat - 优化语音通话场景
|
||||
// Options:
|
||||
// - DefaultToSpeaker: 默认使用扬声器
|
||||
// - AllowBluetooth: 允许蓝牙设备
|
||||
NSError *categoryError = nil;
|
||||
BOOL success =
|
||||
[session setCategory:AVAudioSessionCategoryPlayAndRecord
|
||||
mode:AVAudioSessionModeVoiceChat
|
||||
options:(AVAudioSessionCategoryOptionDefaultToSpeaker |
|
||||
AVAudioSessionCategoryOptionAllowBluetooth)
|
||||
error:&categoryError];
|
||||
|
||||
if (!success) {
|
||||
if (error) {
|
||||
*error = categoryError;
|
||||
}
|
||||
NSLog(@"[AudioSessionManager] Failed to configure session: %@",
|
||||
categoryError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (BOOL)configureForPlayback:(NSError **)error {
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
|
||||
// 仅播放模式
|
||||
NSError *categoryError = nil;
|
||||
BOOL success =
|
||||
[session setCategory:AVAudioSessionCategoryPlayback
|
||||
mode:AVAudioSessionModeDefault
|
||||
options:AVAudioSessionCategoryOptionDefaultToSpeaker
|
||||
error:&categoryError];
|
||||
|
||||
if (!success) {
|
||||
if (error) {
|
||||
*error = categoryError;
|
||||
}
|
||||
NSLog(@"[AudioSessionManager] Failed to configure playback: %@",
|
||||
categoryError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (BOOL)activateSession:(NSError **)error {
|
||||
if (self.isSessionActive) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
NSError *activationError = nil;
|
||||
BOOL success = [session setActive:YES error:&activationError];
|
||||
|
||||
if (!success) {
|
||||
if (error) {
|
||||
*error = activationError;
|
||||
}
|
||||
NSLog(@"[AudioSessionManager] Failed to activate session: %@",
|
||||
activationError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
self.isSessionActive = YES;
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)deactivateSession {
|
||||
if (!self.isSessionActive) {
|
||||
return;
|
||||
}
|
||||
|
||||
AVAudioSession *session = [AVAudioSession sharedInstance];
|
||||
NSError *error = nil;
|
||||
|
||||
// 使用 NotifyOthersOnDeactivation 通知其他应用可以恢复播放
|
||||
[session setActive:NO
|
||||
withOptions:AVAudioSessionSetActiveOptionNotifyOthersOnDeactivation
|
||||
error:&error];
|
||||
|
||||
if (error) {
|
||||
NSLog(@"[AudioSessionManager] Failed to deactivate session: %@",
|
||||
error.localizedDescription);
|
||||
}
|
||||
|
||||
self.isSessionActive = NO;
|
||||
}
|
||||
|
||||
@end
|
||||
63
keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h
Normal file
63
keyBoard/Class/AiTalk/VM/AudioStreamPlayer.h
Normal file
@@ -0,0 +1,63 @@
|
||||
//
|
||||
// AudioStreamPlayer.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 流式音频播放器代理
|
||||
@protocol AudioStreamPlayerDelegate <NSObject>
|
||||
@optional
|
||||
/// 开始播放片段
|
||||
- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId;
|
||||
/// 播放时间更新
|
||||
- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time
|
||||
segmentId:(NSString *)segmentId;
|
||||
/// 片段播放完成
|
||||
- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId;
|
||||
@end
|
||||
|
||||
/// PCM 流式播放器
|
||||
/// 使用 AVAudioEngine + AVAudioPlayerNode 实现低延迟播放
|
||||
@interface AudioStreamPlayer : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<AudioStreamPlayerDelegate> delegate;
|
||||
|
||||
/// 是否正在播放
|
||||
@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing;
|
||||
|
||||
/// 启动播放器
|
||||
/// @param error 错误信息
|
||||
/// @return 是否启动成功
|
||||
- (BOOL)start:(NSError **)error;
|
||||
|
||||
/// 停止播放器
|
||||
- (void)stop;
|
||||
|
||||
/// 入队 PCM 数据块
|
||||
/// @param pcmData PCM Int16 数据
|
||||
/// @param sampleRate 采样率
|
||||
/// @param channels 通道数
|
||||
/// @param segmentId 片段 ID
|
||||
- (void)enqueuePCMChunk:(NSData *)pcmData
|
||||
sampleRate:(double)sampleRate
|
||||
channels:(int)channels
|
||||
segmentId:(NSString *)segmentId;
|
||||
|
||||
/// 获取片段的当前播放时间
|
||||
/// @param segmentId 片段 ID
|
||||
/// @return 当前时间(秒)
|
||||
- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId;
|
||||
|
||||
/// 获取片段的总时长
|
||||
/// @param segmentId 片段 ID
|
||||
/// @return 总时长(秒)
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
246
keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m
Normal file
246
keyBoard/Class/AiTalk/VM/AudioStreamPlayer.m
Normal file
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// AudioStreamPlayer.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "AudioStreamPlayer.h"
|
||||
#import <AVFoundation/AVFoundation.h>
|
||||
|
||||
@interface AudioStreamPlayer ()
|
||||
|
||||
@property(nonatomic, strong) AVAudioEngine *audioEngine;
|
||||
@property(nonatomic, strong) AVAudioPlayerNode *playerNode;
|
||||
@property(nonatomic, strong) AVAudioFormat *playbackFormat;
|
||||
|
||||
// 片段跟踪
|
||||
@property(nonatomic, copy) NSString *currentSegmentId;
|
||||
@property(nonatomic, strong)
|
||||
NSMutableDictionary<NSString *, NSNumber *> *segmentDurations;
|
||||
@property(nonatomic, strong)
|
||||
NSMutableDictionary<NSString *, NSNumber *> *segmentStartTimes;
|
||||
@property(nonatomic, assign) NSUInteger scheduledSamples;
|
||||
@property(nonatomic, assign) NSUInteger playedSamples;
|
||||
|
||||
// 状态
|
||||
@property(nonatomic, assign) BOOL playing;
|
||||
@property(nonatomic, strong) dispatch_queue_t playerQueue;
|
||||
@property(nonatomic, strong) NSTimer *progressTimer;
|
||||
|
||||
@end
|
||||
|
||||
@implementation AudioStreamPlayer
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_audioEngine = [[AVAudioEngine alloc] init];
|
||||
_playerNode = [[AVAudioPlayerNode alloc] init];
|
||||
_segmentDurations = [[NSMutableDictionary alloc] init];
|
||||
_segmentStartTimes = [[NSMutableDictionary alloc] init];
|
||||
_playerQueue = dispatch_queue_create("com.keyboard.aitalk.streamplayer",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
|
||||
// 默认播放格式:16kHz, Mono, Float32
|
||||
_playbackFormat =
|
||||
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
|
||||
sampleRate:16000
|
||||
channels:1
|
||||
interleaved:NO];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self stop];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (BOOL)start:(NSError **)error {
|
||||
if (self.playing) {
|
||||
return YES;
|
||||
}
|
||||
|
||||
// 连接节点
|
||||
[self.audioEngine attachNode:self.playerNode];
|
||||
[self.audioEngine connect:self.playerNode
|
||||
to:self.audioEngine.mainMixerNode
|
||||
format:self.playbackFormat];
|
||||
|
||||
// 启动引擎
|
||||
NSError *startError = nil;
|
||||
[self.audioEngine prepare];
|
||||
|
||||
if (![self.audioEngine startAndReturnError:&startError]) {
|
||||
if (error) {
|
||||
*error = startError;
|
||||
}
|
||||
NSLog(@"[AudioStreamPlayer] Failed to start engine: %@",
|
||||
startError.localizedDescription);
|
||||
return NO;
|
||||
}
|
||||
|
||||
[self.playerNode play];
|
||||
self.playing = YES;
|
||||
|
||||
// 启动进度更新定时器
|
||||
[self startProgressTimer];
|
||||
|
||||
NSLog(@"[AudioStreamPlayer] Started");
|
||||
return YES;
|
||||
}
|
||||
|
||||
- (void)stop {
|
||||
dispatch_async(self.playerQueue, ^{
|
||||
[self stopProgressTimer];
|
||||
|
||||
[self.playerNode stop];
|
||||
[self.audioEngine stop];
|
||||
|
||||
self.playing = NO;
|
||||
self.currentSegmentId = nil;
|
||||
self.scheduledSamples = 0;
|
||||
self.playedSamples = 0;
|
||||
|
||||
[self.segmentDurations removeAllObjects];
|
||||
[self.segmentStartTimes removeAllObjects];
|
||||
|
||||
NSLog(@"[AudioStreamPlayer] Stopped");
|
||||
});
|
||||
}
|
||||
|
||||
- (void)enqueuePCMChunk:(NSData *)pcmData
|
||||
sampleRate:(double)sampleRate
|
||||
channels:(int)channels
|
||||
segmentId:(NSString *)segmentId {
|
||||
|
||||
if (!pcmData || pcmData.length == 0)
|
||||
return;
|
||||
|
||||
dispatch_async(self.playerQueue, ^{
|
||||
// 检查是否是新片段
|
||||
BOOL isNewSegment = ![segmentId isEqualToString:self.currentSegmentId];
|
||||
if (isNewSegment) {
|
||||
self.currentSegmentId = segmentId;
|
||||
self.scheduledSamples = 0;
|
||||
self.segmentStartTimes[segmentId] = @(CACurrentMediaTime());
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioStreamPlayerDidStartSegment:)]) {
|
||||
[self.delegate audioStreamPlayerDidStartSegment:segmentId];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 转换 Int16 -> Float32
|
||||
NSUInteger sampleCount = pcmData.length / sizeof(int16_t);
|
||||
const int16_t *int16Samples = (const int16_t *)pcmData.bytes;
|
||||
|
||||
// 创建播放格式的 buffer
|
||||
AVAudioFormat *format =
|
||||
[[AVAudioFormat alloc] initWithCommonFormat:AVAudioPCMFormatFloat32
|
||||
sampleRate:sampleRate
|
||||
channels:channels
|
||||
interleaved:NO];
|
||||
|
||||
AVAudioPCMBuffer *buffer = [[AVAudioPCMBuffer alloc]
|
||||
initWithPCMFormat:format
|
||||
frameCapacity:(AVAudioFrameCount)sampleCount];
|
||||
buffer.frameLength = (AVAudioFrameCount)sampleCount;
|
||||
|
||||
float *floatChannel = buffer.floatChannelData[0];
|
||||
for (NSUInteger i = 0; i < sampleCount; i++) {
|
||||
floatChannel[i] = (float)int16Samples[i] / 32768.0f;
|
||||
}
|
||||
|
||||
// 调度播放
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[self.playerNode scheduleBuffer:buffer
|
||||
completionHandler:^{
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
dispatch_async(strongSelf.playerQueue, ^{
|
||||
strongSelf.playedSamples += sampleCount;
|
||||
});
|
||||
}];
|
||||
|
||||
self.scheduledSamples += sampleCount;
|
||||
|
||||
// 更新时长
|
||||
NSTimeInterval chunkDuration = (double)sampleCount / sampleRate;
|
||||
NSNumber *currentDuration = self.segmentDurations[segmentId];
|
||||
self.segmentDurations[segmentId] =
|
||||
@(currentDuration.doubleValue + chunkDuration);
|
||||
});
|
||||
}
|
||||
|
||||
- (NSTimeInterval)playbackTimeForSegment:(NSString *)segmentId {
|
||||
if (![segmentId isEqualToString:self.currentSegmentId]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 基于已播放的采样数估算时间
|
||||
return (double)self.playedSamples / self.playbackFormat.sampleRate;
|
||||
}
|
||||
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId {
|
||||
NSNumber *duration = self.segmentDurations[segmentId];
|
||||
return duration ? duration.doubleValue : 0;
|
||||
}
|
||||
|
||||
#pragma mark - Progress Timer
|
||||
|
||||
- (void)startProgressTimer {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
self.progressTimer =
|
||||
[NSTimer scheduledTimerWithTimeInterval:1.0 / 30.0
|
||||
target:self
|
||||
selector:@selector(updateProgress)
|
||||
userInfo:nil
|
||||
repeats:YES];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stopProgressTimer {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
[self.progressTimer invalidate];
|
||||
self.progressTimer = nil;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)updateProgress {
|
||||
if (!self.playing || !self.currentSegmentId) {
|
||||
return;
|
||||
}
|
||||
|
||||
NSTimeInterval currentTime =
|
||||
[self playbackTimeForSegment:self.currentSegmentId];
|
||||
NSString *segmentId = self.currentSegmentId;
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioStreamPlayerDidUpdateTime:segmentId:)]) {
|
||||
[self.delegate audioStreamPlayerDidUpdateTime:currentTime
|
||||
segmentId:segmentId];
|
||||
}
|
||||
|
||||
// 检查是否播放完成
|
||||
NSTimeInterval duration = [self durationForSegment:segmentId];
|
||||
if (duration > 0 && currentTime >= duration - 0.1) {
|
||||
// 播放完成
|
||||
dispatch_async(self.playerQueue, ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(audioStreamPlayerDidFinishSegment:)]) {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
[self.delegate audioStreamPlayerDidFinishSegment:segmentId];
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
88
keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h
Normal file
88
keyBoard/Class/AiTalk/VM/ConversationOrchestrator.h
Normal file
@@ -0,0 +1,88 @@
|
||||
//
|
||||
// ConversationOrchestrator.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 对话状态
|
||||
typedef NS_ENUM(NSInteger, ConversationState) {
|
||||
ConversationStateIdle = 0, // 空闲
|
||||
ConversationStateListening, // 正在录音
|
||||
ConversationStateRecognizing, // 正在识别(等待 ASR 结果)
|
||||
ConversationStateThinking, // 正在思考(等待 LLM 回复)
|
||||
ConversationStateSpeaking // 正在播报 TTS
|
||||
};
|
||||
|
||||
/// 对话编排器
|
||||
/// 核心状态机,串联所有模块,处理打断逻辑
|
||||
@interface ConversationOrchestrator : NSObject
|
||||
|
||||
/// 当前状态
|
||||
@property(nonatomic, assign, readonly) ConversationState state;
|
||||
|
||||
/// 当前对话 ID
|
||||
@property(nonatomic, copy, readonly, nullable) NSString *conversationId;
|
||||
|
||||
#pragma mark - Callbacks
|
||||
|
||||
/// 用户最终识别文本回调
|
||||
@property(nonatomic, copy, nullable) void (^onUserFinalText)(NSString *text);
|
||||
|
||||
/// AI 可见文本回调(打字机效果)
|
||||
@property(nonatomic, copy, nullable) void (^onAssistantVisibleText)
|
||||
(NSString *text);
|
||||
|
||||
/// AI 完整回复文本回调
|
||||
@property(nonatomic, copy, nullable) void (^onAssistantFullText)(NSString *text)
|
||||
;
|
||||
|
||||
/// 实时识别文本回调(部分结果)
|
||||
@property(nonatomic, copy, nullable) void (^onPartialText)(NSString *text);
|
||||
|
||||
/// 音量更新回调(用于波形 UI)
|
||||
@property(nonatomic, copy, nullable) void (^onVolumeUpdate)(float rms);
|
||||
|
||||
/// 状态变化回调
|
||||
@property(nonatomic, copy, nullable) void (^onStateChange)
|
||||
(ConversationState state);
|
||||
|
||||
/// 错误回调
|
||||
@property(nonatomic, copy, nullable) void (^onError)(NSError *error);
|
||||
|
||||
/// AI 开始说话回调
|
||||
@property(nonatomic, copy, nullable) void (^onSpeakingStart)(void);
|
||||
|
||||
/// AI 说话结束回调
|
||||
@property(nonatomic, copy, nullable) void (^onSpeakingEnd)(void);
|
||||
|
||||
#pragma mark - Configuration
|
||||
|
||||
/// ASR 服务器 URL
|
||||
@property(nonatomic, copy) NSString *asrServerURL;
|
||||
|
||||
/// LLM 服务器 URL
|
||||
@property(nonatomic, copy) NSString *llmServerURL;
|
||||
|
||||
/// TTS 服务器 URL
|
||||
@property(nonatomic, copy) NSString *ttsServerURL;
|
||||
|
||||
#pragma mark - User Actions
|
||||
|
||||
/// 用户按下录音按钮
|
||||
/// 如果当前正在播放,会自动打断
|
||||
- (void)userDidPressRecord;
|
||||
|
||||
/// 用户松开录音按钮
|
||||
- (void)userDidReleaseRecord;
|
||||
|
||||
/// 手动停止(退出页面等)
|
||||
- (void)stop;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
527
keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m
Normal file
527
keyBoard/Class/AiTalk/VM/ConversationOrchestrator.m
Normal file
@@ -0,0 +1,527 @@
|
||||
//
|
||||
// ConversationOrchestrator.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "ConversationOrchestrator.h"
|
||||
#import "ASRStreamClient.h"
|
||||
#import "AudioCaptureManager.h"
|
||||
#import "AudioSessionManager.h"
|
||||
#import "LLMStreamClient.h"
|
||||
#import "Segmenter.h"
|
||||
#import "SubtitleSync.h"
|
||||
#import "TTSPlaybackPipeline.h"
|
||||
#import "TTSServiceClient.h"
|
||||
|
||||
@interface ConversationOrchestrator () <
|
||||
AudioSessionManagerDelegate, AudioCaptureManagerDelegate,
|
||||
ASRStreamClientDelegate, LLMStreamClientDelegate, TTSServiceClientDelegate,
|
||||
TTSPlaybackPipelineDelegate>
|
||||
|
||||
// 模块
|
||||
@property(nonatomic, strong) AudioSessionManager *audioSession;
|
||||
@property(nonatomic, strong) AudioCaptureManager *audioCapture;
|
||||
@property(nonatomic, strong) ASRStreamClient *asrClient;
|
||||
@property(nonatomic, strong) LLMStreamClient *llmClient;
|
||||
@property(nonatomic, strong) Segmenter *segmenter;
|
||||
@property(nonatomic, strong) TTSServiceClient *ttsClient;
|
||||
@property(nonatomic, strong) TTSPlaybackPipeline *playbackPipeline;
|
||||
@property(nonatomic, strong) SubtitleSync *subtitleSync;
|
||||
|
||||
// 状态
|
||||
@property(nonatomic, assign) ConversationState state;
|
||||
@property(nonatomic, copy) NSString *conversationId;
|
||||
@property(nonatomic, copy) NSString *currentSessionId;
|
||||
|
||||
// 文本跟踪
|
||||
@property(nonatomic, strong) NSMutableString *fullAssistantText;
|
||||
@property(nonatomic, strong)
|
||||
NSMutableDictionary<NSString *, NSString *> *segmentTextMap;
|
||||
@property(nonatomic, assign) NSInteger segmentCounter;
|
||||
|
||||
// 队列
|
||||
@property(nonatomic, strong) dispatch_queue_t orchestratorQueue;
|
||||
|
||||
@end
|
||||
|
||||
@implementation ConversationOrchestrator
|
||||
|
||||
#pragma mark - Initialization
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_orchestratorQueue = dispatch_queue_create(
|
||||
"com.keyboard.aitalk.orchestrator", DISPATCH_QUEUE_SERIAL);
|
||||
_state = ConversationStateIdle;
|
||||
_conversationId = [[NSUUID UUID] UUIDString];
|
||||
|
||||
_fullAssistantText = [[NSMutableString alloc] init];
|
||||
_segmentTextMap = [[NSMutableDictionary alloc] init];
|
||||
_segmentCounter = 0;
|
||||
|
||||
[self setupModules];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setupModules {
|
||||
// Audio Session
|
||||
self.audioSession = [AudioSessionManager sharedManager];
|
||||
self.audioSession.delegate = self;
|
||||
|
||||
// Audio Capture
|
||||
self.audioCapture = [[AudioCaptureManager alloc] init];
|
||||
self.audioCapture.delegate = self;
|
||||
|
||||
// ASR Client
|
||||
self.asrClient = [[ASRStreamClient alloc] init];
|
||||
self.asrClient.delegate = self;
|
||||
|
||||
// LLM Client
|
||||
self.llmClient = [[LLMStreamClient alloc] init];
|
||||
self.llmClient.delegate = self;
|
||||
|
||||
// Segmenter
|
||||
self.segmenter = [[Segmenter alloc] init];
|
||||
|
||||
// TTS Client
|
||||
self.ttsClient = [[TTSServiceClient alloc] init];
|
||||
self.ttsClient.delegate = self;
|
||||
|
||||
// Playback Pipeline
|
||||
self.playbackPipeline = [[TTSPlaybackPipeline alloc] init];
|
||||
self.playbackPipeline.delegate = self;
|
||||
|
||||
// Subtitle Sync
|
||||
self.subtitleSync = [[SubtitleSync alloc] init];
|
||||
}
|
||||
|
||||
#pragma mark - Configuration Setters
|
||||
|
||||
- (void)setAsrServerURL:(NSString *)asrServerURL {
|
||||
_asrServerURL = [asrServerURL copy];
|
||||
self.asrClient.serverURL = asrServerURL;
|
||||
}
|
||||
|
||||
- (void)setLlmServerURL:(NSString *)llmServerURL {
|
||||
_llmServerURL = [llmServerURL copy];
|
||||
self.llmClient.serverURL = llmServerURL;
|
||||
}
|
||||
|
||||
- (void)setTtsServerURL:(NSString *)ttsServerURL {
|
||||
_ttsServerURL = [ttsServerURL copy];
|
||||
self.ttsClient.serverURL = ttsServerURL;
|
||||
}
|
||||
|
||||
#pragma mark - User Actions
|
||||
|
||||
- (void)userDidPressRecord {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
NSLog(@"[Orchestrator] userDidPressRecord, current state: %ld",
|
||||
(long)self.state);
|
||||
|
||||
// 如果正在播放或思考,执行打断
|
||||
if (self.state == ConversationStateSpeaking ||
|
||||
self.state == ConversationStateThinking) {
|
||||
[self performBargein];
|
||||
}
|
||||
|
||||
// 检查麦克风权限
|
||||
if (![self.audioSession hasMicrophonePermission]) {
|
||||
[self.audioSession requestMicrophonePermission:^(BOOL granted) {
|
||||
if (granted) {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self startRecording];
|
||||
});
|
||||
}
|
||||
}];
|
||||
return;
|
||||
}
|
||||
|
||||
[self startRecording];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)userDidReleaseRecord {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
NSLog(@"[Orchestrator] userDidReleaseRecord, current state: %ld",
|
||||
(long)self.state);
|
||||
|
||||
if (self.state != ConversationStateListening) {
|
||||
return;
|
||||
}
|
||||
|
||||
// 停止采集
|
||||
[self.audioCapture stopCapture];
|
||||
|
||||
// 请求 ASR 最终结果
|
||||
[self.asrClient finalize];
|
||||
|
||||
// 更新状态
|
||||
[self updateState:ConversationStateRecognizing];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)stop {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self cancelAll];
|
||||
[self updateState:ConversationStateIdle];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private: Recording
|
||||
|
||||
- (void)startRecording {
|
||||
// 配置音频会话
|
||||
NSError *error = nil;
|
||||
if (![self.audioSession configureForConversation:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
if (![self.audioSession activateSession:&error]) {
|
||||
[self reportError:error];
|
||||
return;
|
||||
}
|
||||
|
||||
// 生成新的会话 ID
|
||||
self.currentSessionId = [[NSUUID UUID] UUIDString];
|
||||
|
||||
// 启动 ASR
|
||||
[self.asrClient startWithSessionId:self.currentSessionId];
|
||||
|
||||
// 启动音频采集
|
||||
if (![self.audioCapture startCapture:&error]) {
|
||||
[self reportError:error];
|
||||
[self.asrClient cancel];
|
||||
return;
|
||||
}
|
||||
|
||||
// 更新状态
|
||||
[self updateState:ConversationStateListening];
|
||||
}
|
||||
|
||||
#pragma mark - Private: Barge-in (打断)
|
||||
|
||||
- (void)performBargein {
|
||||
NSLog(@"[Orchestrator] Performing barge-in");
|
||||
|
||||
// 取消所有正在进行的请求
|
||||
[self.ttsClient cancel];
|
||||
[self.llmClient cancel];
|
||||
[self.asrClient cancel];
|
||||
|
||||
// 停止播放
|
||||
[self.playbackPipeline stop];
|
||||
|
||||
// 清空状态
|
||||
[self.segmenter reset];
|
||||
[self.segmentTextMap removeAllObjects];
|
||||
[self.fullAssistantText setString:@""];
|
||||
self.segmentCounter = 0;
|
||||
}
|
||||
|
||||
- (void)cancelAll {
|
||||
[self.audioCapture stopCapture];
|
||||
[self.asrClient cancel];
|
||||
[self.llmClient cancel];
|
||||
[self.ttsClient cancel];
|
||||
[self.playbackPipeline stop];
|
||||
[self.segmenter reset];
|
||||
[self.audioSession deactivateSession];
|
||||
}
|
||||
|
||||
#pragma mark - Private: State Management
|
||||
|
||||
- (void)updateState:(ConversationState)newState {
|
||||
if (self.state == newState)
|
||||
return;
|
||||
|
||||
ConversationState oldState = self.state;
|
||||
self.state = newState;
|
||||
|
||||
NSLog(@"[Orchestrator] State: %ld -> %ld", (long)oldState, (long)newState);
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onStateChange) {
|
||||
self.onStateChange(newState);
|
||||
}
|
||||
|
||||
// 特殊状态回调
|
||||
if (newState == ConversationStateSpeaking &&
|
||||
oldState != ConversationStateSpeaking) {
|
||||
if (self.onSpeakingStart) {
|
||||
self.onSpeakingStart();
|
||||
}
|
||||
}
|
||||
|
||||
if (oldState == ConversationStateSpeaking &&
|
||||
newState != ConversationStateSpeaking) {
|
||||
if (self.onSpeakingEnd) {
|
||||
self.onSpeakingEnd();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
NSLog(@"[Orchestrator] Error: %@", error.localizedDescription);
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onError) {
|
||||
self.onError(error);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioCaptureManagerDelegate
|
||||
|
||||
- (void)audioCaptureManagerDidOutputPCMFrame:(NSData *)pcmFrame {
|
||||
// 发送到 ASR
|
||||
[self.asrClient sendAudioPCMFrame:pcmFrame];
|
||||
}
|
||||
|
||||
- (void)audioCaptureManagerDidUpdateRMS:(float)rms {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onVolumeUpdate) {
|
||||
self.onVolumeUpdate(rms);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioSessionManagerDelegate
|
||||
|
||||
- (void)audioSessionManagerDidInterrupt:(KBAudioSessionInterruptionType)type {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
if (type == KBAudioSessionInterruptionTypeBegan) {
|
||||
// 中断开始:停止采集和播放
|
||||
[self cancelAll];
|
||||
[self updateState:ConversationStateIdle];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)audioSessionManagerMicrophonePermissionDenied {
|
||||
NSError *error =
|
||||
[NSError errorWithDomain:@"ConversationOrchestrator"
|
||||
code:-1
|
||||
userInfo:@{
|
||||
NSLocalizedDescriptionKey : @"请在设置中开启麦克风权限"
|
||||
}];
|
||||
[self reportError:error];
|
||||
}
|
||||
|
||||
#pragma mark - ASRStreamClientDelegate
|
||||
|
||||
- (void)asrClientDidReceivePartialText:(NSString *)text {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onPartialText) {
|
||||
self.onPartialText(text);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)asrClientDidReceiveFinalText:(NSString *)text {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
NSLog(@"[Orchestrator] ASR final text: %@", text);
|
||||
|
||||
// 回调用户文本
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onUserFinalText) {
|
||||
self.onUserFinalText(text);
|
||||
}
|
||||
});
|
||||
|
||||
// 如果文本为空,回到空闲
|
||||
if (text.length == 0) {
|
||||
[self updateState:ConversationStateIdle];
|
||||
return;
|
||||
}
|
||||
|
||||
// 更新状态并开始 LLM 请求
|
||||
[self updateState:ConversationStateThinking];
|
||||
|
||||
// 重置文本跟踪
|
||||
[self.fullAssistantText setString:@""];
|
||||
[self.segmentTextMap removeAllObjects];
|
||||
self.segmentCounter = 0;
|
||||
[self.segmenter reset];
|
||||
|
||||
// 启动播放管线
|
||||
NSError *error = nil;
|
||||
if (![self.playbackPipeline start:&error]) {
|
||||
NSLog(@"[Orchestrator] Failed to start playback pipeline: %@",
|
||||
error.localizedDescription);
|
||||
}
|
||||
|
||||
// 发送 LLM 请求
|
||||
[self.llmClient sendUserText:text conversationId:self.conversationId];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)asrClientDidFail:(NSError *)error {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self reportError:error];
|
||||
[self updateState:ConversationStateIdle];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - LLMStreamClientDelegate
|
||||
|
||||
- (void)llmClientDidReceiveToken:(NSString *)token {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
// 追加到完整文本
|
||||
[self.fullAssistantText appendString:token];
|
||||
|
||||
// 追加到分段器
|
||||
[self.segmenter appendToken:token];
|
||||
|
||||
// 检查是否有可触发 TTS 的片段
|
||||
NSArray<NSString *> *segments = [self.segmenter popReadySegments];
|
||||
for (NSString *segmentText in segments) {
|
||||
[self requestTTSForSegment:segmentText];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)llmClientDidComplete {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
NSLog(@"[Orchestrator] LLM complete");
|
||||
|
||||
// 处理剩余片段
|
||||
NSString *remaining = [self.segmenter flushRemainingSegment];
|
||||
if (remaining && remaining.length > 0) {
|
||||
[self requestTTSForSegment:remaining];
|
||||
}
|
||||
|
||||
// 回调完整文本
|
||||
NSString *fullText = [self.fullAssistantText copy];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onAssistantFullText) {
|
||||
self.onAssistantFullText(fullText);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)llmClientDidFail:(NSError *)error {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self reportError:error];
|
||||
[self updateState:ConversationStateIdle];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private: TTS Request
|
||||
|
||||
- (void)requestTTSForSegment:(NSString *)segmentText {
|
||||
NSString *segmentId =
|
||||
[NSString stringWithFormat:@"seg_%ld", (long)self.segmentCounter++];
|
||||
|
||||
// 记录片段文本
|
||||
self.segmentTextMap[segmentId] = segmentText;
|
||||
|
||||
NSLog(@"[Orchestrator] Requesting TTS for segment %@: %@", segmentId,
|
||||
segmentText);
|
||||
|
||||
// 请求 TTS
|
||||
[self.ttsClient requestTTSForText:segmentText segmentId:segmentId];
|
||||
}
|
||||
|
||||
#pragma mark - TTSServiceClientDelegate
|
||||
|
||||
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self.playbackPipeline enqueueURL:url segmentId:segmentId];
|
||||
|
||||
// 如果还在 Thinking,切换到 Speaking
|
||||
if (self.state == ConversationStateThinking) {
|
||||
[self updateState:ConversationStateSpeaking];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
|
||||
payloadType:(TTSPayloadType)type
|
||||
segmentId:(NSString *)segmentId {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self.playbackPipeline enqueueChunk:chunk
|
||||
payloadType:type
|
||||
segmentId:segmentId];
|
||||
|
||||
// 如果还在 Thinking,切换到 Speaking
|
||||
if (self.state == ConversationStateThinking) {
|
||||
[self updateState:ConversationStateSpeaking];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)ttsClientDidFinishSegment:(NSString *)segmentId {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self.playbackPipeline markSegmentComplete:segmentId];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)ttsClientDidFail:(NSError *)error {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self reportError:error];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - TTSPlaybackPipelineDelegate
|
||||
|
||||
- (void)pipelineDidStartSegment:(NSString *)segmentId
|
||||
duration:(NSTimeInterval)duration {
|
||||
NSLog(@"[Orchestrator] Started playing segment: %@", segmentId);
|
||||
}
|
||||
|
||||
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
|
||||
segmentId:(NSString *)segmentId {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
// 获取片段文本
|
||||
NSString *segmentText = self.segmentTextMap[segmentId];
|
||||
if (!segmentText)
|
||||
return;
|
||||
|
||||
// 计算可见文本
|
||||
NSTimeInterval duration =
|
||||
[self.playbackPipeline durationForSegment:segmentId];
|
||||
NSString *visibleText =
|
||||
[self.subtitleSync visibleTextForFullText:segmentText
|
||||
currentTime:time
|
||||
duration:duration];
|
||||
|
||||
// TODO: 这里应该累加之前片段的文本,实现完整的打字机效果
|
||||
// 简化实现:只显示当前片段
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if (self.onAssistantVisibleText) {
|
||||
self.onAssistantVisibleText(visibleText);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
- (void)pipelineDidFinishSegment:(NSString *)segmentId {
|
||||
NSLog(@"[Orchestrator] Finished playing segment: %@", segmentId);
|
||||
}
|
||||
|
||||
- (void)pipelineDidFinishAllSegments {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
NSLog(@"[Orchestrator] All segments finished");
|
||||
|
||||
// 回到空闲状态
|
||||
[self updateState:ConversationStateIdle];
|
||||
[self.audioSession deactivateSession];
|
||||
});
|
||||
}
|
||||
|
||||
- (void)pipelineDidFail:(NSError *)error {
|
||||
dispatch_async(self.orchestratorQueue, ^{
|
||||
[self reportError:error];
|
||||
[self updateState:ConversationStateIdle];
|
||||
});
|
||||
}
|
||||
|
||||
@end
|
||||
48
keyBoard/Class/AiTalk/VM/LLMStreamClient.h
Normal file
48
keyBoard/Class/AiTalk/VM/LLMStreamClient.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// LLMStreamClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// LLM 流式生成客户端代理
|
||||
@protocol LLMStreamClientDelegate <NSObject>
|
||||
@required
|
||||
/// 收到新的 token
|
||||
- (void)llmClientDidReceiveToken:(NSString *)token;
|
||||
/// 生成完成
|
||||
- (void)llmClientDidComplete;
|
||||
/// 生成失败
|
||||
- (void)llmClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// LLM 流式生成客户端
|
||||
/// 支持 SSE(Server-Sent Events)或 WebSocket 接收 token 流
|
||||
@interface LLMStreamClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<LLMStreamClientDelegate> delegate;
|
||||
|
||||
/// LLM 服务器 URL
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
/// API Key(如需要)
|
||||
@property(nonatomic, copy, nullable) NSString *apiKey;
|
||||
|
||||
/// 是否正在生成
|
||||
@property(nonatomic, assign, readonly, getter=isGenerating) BOOL generating;
|
||||
|
||||
/// 发送用户文本请求 LLM 回复
|
||||
/// @param text 用户输入的文本
|
||||
/// @param conversationId 对话 ID
|
||||
- (void)sendUserText:(NSString *)text conversationId:(NSString *)conversationId;
|
||||
|
||||
/// 取消当前请求
|
||||
- (void)cancel;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
244
keyBoard/Class/AiTalk/VM/LLMStreamClient.m
Normal file
244
keyBoard/Class/AiTalk/VM/LLMStreamClient.m
Normal file
@@ -0,0 +1,244 @@
|
||||
//
|
||||
// LLMStreamClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "LLMStreamClient.h"
|
||||
|
||||
@interface LLMStreamClient () <NSURLSessionDataDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong) NSURLSessionDataTask *dataTask;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL generating;
|
||||
@property(nonatomic, strong) NSMutableString *buffer; // SSE 数据缓冲
|
||||
|
||||
@end
|
||||
|
||||
@implementation LLMStreamClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.llm.network",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_buffer = [[NSMutableString alloc] init];
|
||||
// TODO: 替换为实际的 LLM 服务器地址
|
||||
_serverURL = @"https://your-llm-server.com/api/chat/stream";
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self cancel];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)sendUserText:(NSString *)text
|
||||
conversationId:(NSString *)conversationId {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self cancelInternal];
|
||||
|
||||
self.generating = YES;
|
||||
[self.buffer setString:@""];
|
||||
|
||||
// 创建请求
|
||||
NSURL *url = [NSURL URLWithString:self.serverURL];
|
||||
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
|
||||
request.HTTPMethod = @"POST";
|
||||
[request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"];
|
||||
[request setValue:@"text/event-stream" forHTTPHeaderField:@"Accept"];
|
||||
|
||||
if (self.apiKey) {
|
||||
[request setValue:[NSString stringWithFormat:@"Bearer %@", self.apiKey]
|
||||
forHTTPHeaderField:@"Authorization"];
|
||||
}
|
||||
|
||||
// 请求体
|
||||
NSDictionary *body = @{
|
||||
@"message" : text,
|
||||
@"conversationId" : conversationId,
|
||||
@"stream" : @YES
|
||||
};
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
request.HTTPBody = jsonData;
|
||||
|
||||
// 创建会话
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 60;
|
||||
config.timeoutIntervalForResource = 300;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
|
||||
self.dataTask = [self.urlSession dataTaskWithRequest:request];
|
||||
[self.dataTask resume];
|
||||
|
||||
NSLog(@"[LLMStreamClient] Started request for conversation: %@",
|
||||
conversationId);
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
[self cancelInternal];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)cancelInternal {
|
||||
self.generating = NO;
|
||||
|
||||
if (self.dataTask) {
|
||||
[self.dataTask cancel];
|
||||
self.dataTask = nil;
|
||||
}
|
||||
|
||||
if (self.urlSession) {
|
||||
[self.urlSession invalidateAndCancel];
|
||||
self.urlSession = nil;
|
||||
}
|
||||
|
||||
[self.buffer setString:@""];
|
||||
}
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
self.generating = NO;
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(llmClientDidFail:)]) {
|
||||
[self.delegate llmClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportComplete {
|
||||
self.generating = NO;
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(llmClientDidComplete)]) {
|
||||
[self.delegate llmClientDidComplete];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)reportToken:(NSString *)token {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(llmClientDidReceiveToken:)]) {
|
||||
[self.delegate llmClientDidReceiveToken:token];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - SSE Parsing
|
||||
|
||||
- (void)parseSSEData:(NSData *)data {
|
||||
NSString *string = [[NSString alloc] initWithData:data
|
||||
encoding:NSUTF8StringEncoding];
|
||||
if (!string)
|
||||
return;
|
||||
|
||||
[self.buffer appendString:string];
|
||||
|
||||
// SSE 格式:每个事件以 \n\n 分隔
|
||||
NSArray *events = [self.buffer componentsSeparatedByString:@"\n\n"];
|
||||
|
||||
// 保留最后一个可能不完整的事件
|
||||
if (events.count > 1) {
|
||||
[self.buffer setString:events.lastObject];
|
||||
|
||||
for (NSUInteger i = 0; i < events.count - 1; i++) {
|
||||
[self handleSSEEvent:events[i]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (void)handleSSEEvent:(NSString *)event {
|
||||
if (event.length == 0)
|
||||
return;
|
||||
|
||||
// 解析 SSE 事件
|
||||
// 格式: data: {...}
|
||||
NSArray *lines = [event componentsSeparatedByString:@"\n"];
|
||||
|
||||
for (NSString *line in lines) {
|
||||
if ([line hasPrefix:@"data: "]) {
|
||||
NSString *dataString = [line substringFromIndex:6];
|
||||
|
||||
// 检查是否是结束标志
|
||||
if ([dataString isEqualToString:@"[DONE]"]) {
|
||||
[self reportComplete];
|
||||
return;
|
||||
}
|
||||
|
||||
// 解析 JSON
|
||||
NSData *jsonData = [dataString dataUsingEncoding:NSUTF8StringEncoding];
|
||||
NSError *jsonError = nil;
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:jsonData
|
||||
options:0
|
||||
error:&jsonError];
|
||||
|
||||
if (jsonError) {
|
||||
NSLog(@"[LLMStreamClient] Failed to parse SSE data: %@", dataString);
|
||||
continue;
|
||||
}
|
||||
|
||||
// 提取 token(根据实际 API 格式调整)
|
||||
// 常见格式: {"token": "..."} 或 {"choices": [{"delta": {"content":
|
||||
// "..."}}]}
|
||||
NSString *token = json[@"token"];
|
||||
if (!token) {
|
||||
// OpenAI 格式
|
||||
NSArray *choices = json[@"choices"];
|
||||
if (choices.count > 0) {
|
||||
NSDictionary *delta = choices[0][@"delta"];
|
||||
token = delta[@"content"];
|
||||
}
|
||||
}
|
||||
|
||||
if (token && token.length > 0) {
|
||||
[self reportToken:token];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - NSURLSessionDataDelegate
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
dataTask:(NSURLSessionDataTask *)dataTask
|
||||
didReceiveData:(NSData *)data {
|
||||
[self parseSSEData:data];
|
||||
}
|
||||
|
||||
- (void)URLSession:(NSURLSession *)session
|
||||
task:(NSURLSessionTask *)task
|
||||
didCompleteWithError:(NSError *)error {
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled) {
|
||||
[self reportError:error];
|
||||
}
|
||||
} else {
|
||||
// 处理缓冲区中剩余的数据
|
||||
if (self.buffer.length > 0) {
|
||||
[self handleSSEEvent:self.buffer];
|
||||
[self.buffer setString:@""];
|
||||
}
|
||||
[self reportComplete];
|
||||
}
|
||||
}
|
||||
|
||||
@end
|
||||
37
keyBoard/Class/AiTalk/VM/Segmenter.h
Normal file
37
keyBoard/Class/AiTalk/VM/Segmenter.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Segmenter.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 句子切分器
|
||||
/// 将 LLM 输出的 token 流切分成可触发 TTS 的句子片段
|
||||
@interface Segmenter : NSObject
|
||||
|
||||
/// 累积字符数阈值(超过此值强制切分)
|
||||
/// 默认:30
|
||||
@property(nonatomic, assign) NSUInteger maxCharacterThreshold;
|
||||
|
||||
/// 追加 token
|
||||
/// @param token LLM 输出的 token
|
||||
- (void)appendToken:(NSString *)token;
|
||||
|
||||
/// 获取并移除已准备好的片段
|
||||
/// @return 可立即进行 TTS 的片段数组
|
||||
- (NSArray<NSString *> *)popReadySegments;
|
||||
|
||||
/// 获取剩余的未完成片段(用于最后 flush)
|
||||
/// @return 剩余片段,可能为空
|
||||
- (NSString *)flushRemainingSegment;
|
||||
|
||||
/// 重置状态
|
||||
- (void)reset;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
148
keyBoard/Class/AiTalk/VM/Segmenter.m
Normal file
148
keyBoard/Class/AiTalk/VM/Segmenter.m
Normal file
@@ -0,0 +1,148 @@
|
||||
//
|
||||
// Segmenter.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "Segmenter.h"
|
||||
|
||||
@interface Segmenter ()
|
||||
|
||||
@property(nonatomic, strong) NSMutableString *buffer;
|
||||
@property(nonatomic, strong) NSMutableArray<NSString *> *readySegments;
|
||||
|
||||
@end
|
||||
|
||||
@implementation Segmenter
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_buffer = [[NSMutableString alloc] init];
|
||||
_readySegments = [[NSMutableArray alloc] init];
|
||||
_maxCharacterThreshold = 30;
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)appendToken:(NSString *)token {
|
||||
if (!token || token.length == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
[self.buffer appendString:token];
|
||||
|
||||
// 检查是否需要切分
|
||||
[self checkAndSplit];
|
||||
}
|
||||
|
||||
- (NSArray<NSString *> *)popReadySegments {
|
||||
NSArray *segments = [self.readySegments copy];
|
||||
[self.readySegments removeAllObjects];
|
||||
return segments;
|
||||
}
|
||||
|
||||
- (NSString *)flushRemainingSegment {
|
||||
NSString *remaining = [self.buffer copy];
|
||||
[self.buffer setString:@""];
|
||||
|
||||
// 去除首尾空白
|
||||
remaining = [remaining
|
||||
stringByTrimmingCharactersInSet:[NSCharacterSet
|
||||
whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
return remaining.length > 0 ? remaining : nil;
|
||||
}
|
||||
|
||||
- (void)reset {
|
||||
[self.buffer setString:@""];
|
||||
[self.readySegments removeAllObjects];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)checkAndSplit {
|
||||
// 句子结束标点
|
||||
NSCharacterSet *sentenceEnders =
|
||||
[NSCharacterSet characterSetWithCharactersInString:@"。!?\n"];
|
||||
|
||||
while (YES) {
|
||||
NSString *currentBuffer = self.buffer;
|
||||
|
||||
// 查找第一个句子结束标点
|
||||
NSRange range = [currentBuffer rangeOfCharacterFromSet:sentenceEnders];
|
||||
|
||||
if (range.location != NSNotFound) {
|
||||
// 找到结束标点,切分
|
||||
NSUInteger endIndex = range.location + 1;
|
||||
NSString *segment = [currentBuffer substringToIndex:endIndex];
|
||||
segment = [segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
// 移除已切分的部分
|
||||
[self.buffer deleteCharactersInRange:NSMakeRange(0, endIndex)];
|
||||
} else if (currentBuffer.length >= self.maxCharacterThreshold) {
|
||||
// 未找到标点,但超过阈值,强制切分
|
||||
// 尝试在空格或逗号处切分
|
||||
NSRange breakRange = [self findBestBreakPoint:currentBuffer];
|
||||
|
||||
if (breakRange.location != NSNotFound) {
|
||||
NSString *segment =
|
||||
[currentBuffer substringToIndex:breakRange.location + 1];
|
||||
segment =
|
||||
[segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
[self.buffer
|
||||
deleteCharactersInRange:NSMakeRange(0, breakRange.location + 1)];
|
||||
} else {
|
||||
// 无法找到合适的断点,直接切分
|
||||
NSString *segment =
|
||||
[currentBuffer substringToIndex:self.maxCharacterThreshold];
|
||||
segment =
|
||||
[segment stringByTrimmingCharactersInSet:
|
||||
[NSCharacterSet whitespaceAndNewlineCharacterSet]];
|
||||
|
||||
if (segment.length > 0) {
|
||||
[self.readySegments addObject:segment];
|
||||
}
|
||||
|
||||
[self.buffer
|
||||
deleteCharactersInRange:NSMakeRange(0, self.maxCharacterThreshold)];
|
||||
}
|
||||
} else {
|
||||
// 未达到切分条件
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- (NSRange)findBestBreakPoint:(NSString *)text {
|
||||
// 优先在逗号、分号等处断开
|
||||
NSCharacterSet *breakChars =
|
||||
[NSCharacterSet characterSetWithCharactersInString:@",,、;;:: "];
|
||||
|
||||
// 从后往前查找,尽可能多包含内容
|
||||
for (NSInteger i = text.length - 1; i >= self.maxCharacterThreshold / 2;
|
||||
i--) {
|
||||
unichar c = [text characterAtIndex:i];
|
||||
if ([breakChars characterIsMember:c]) {
|
||||
return NSMakeRange(i, 1);
|
||||
}
|
||||
}
|
||||
|
||||
return NSMakeRange(NSNotFound, 0);
|
||||
}
|
||||
|
||||
@end
|
||||
36
keyBoard/Class/AiTalk/VM/SubtitleSync.h
Normal file
36
keyBoard/Class/AiTalk/VM/SubtitleSync.h
Normal file
@@ -0,0 +1,36 @@
|
||||
//
|
||||
// SubtitleSync.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 字幕同步器
|
||||
/// 根据播放进度映射文字显示,实现打字机效果
|
||||
@interface SubtitleSync : NSObject
|
||||
|
||||
/// 获取当前应显示的文本
|
||||
/// @param fullText 完整文本
|
||||
/// @param currentTime 当前播放时间(秒)
|
||||
/// @param duration 总时长(秒)
|
||||
/// @return 应显示的部分文本(打字机效果)
|
||||
- (NSString *)visibleTextForFullText:(NSString *)fullText
|
||||
currentTime:(NSTimeInterval)currentTime
|
||||
duration:(NSTimeInterval)duration;
|
||||
|
||||
/// 获取可见字符数
|
||||
/// @param fullText 完整文本
|
||||
/// @param currentTime 当前播放时间(秒)
|
||||
/// @param duration 总时长(秒)
|
||||
/// @return 应显示的字符数
|
||||
- (NSUInteger)visibleCountForFullText:(NSString *)fullText
|
||||
currentTime:(NSTimeInterval)currentTime
|
||||
duration:(NSTimeInterval)duration;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
66
keyBoard/Class/AiTalk/VM/SubtitleSync.m
Normal file
66
keyBoard/Class/AiTalk/VM/SubtitleSync.m
Normal file
@@ -0,0 +1,66 @@
|
||||
//
|
||||
// SubtitleSync.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "SubtitleSync.h"
|
||||
|
||||
@implementation SubtitleSync
|
||||
|
||||
- (NSString *)visibleTextForFullText:(NSString *)fullText
|
||||
currentTime:(NSTimeInterval)currentTime
|
||||
duration:(NSTimeInterval)duration {
|
||||
|
||||
if (!fullText || fullText.length == 0) {
|
||||
return @"";
|
||||
}
|
||||
|
||||
NSUInteger visibleCount = [self visibleCountForFullText:fullText
|
||||
currentTime:currentTime
|
||||
duration:duration];
|
||||
|
||||
if (visibleCount >= fullText.length) {
|
||||
return fullText;
|
||||
}
|
||||
|
||||
return [fullText substringToIndex:visibleCount];
|
||||
}
|
||||
|
||||
- (NSUInteger)visibleCountForFullText:(NSString *)fullText
|
||||
currentTime:(NSTimeInterval)currentTime
|
||||
duration:(NSTimeInterval)duration {
|
||||
|
||||
if (!fullText || fullText.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 边界情况处理
|
||||
if (duration <= 0) {
|
||||
// 如果没有时长信息,直接返回全部
|
||||
return fullText.length;
|
||||
}
|
||||
|
||||
if (currentTime <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (currentTime >= duration) {
|
||||
return fullText.length;
|
||||
}
|
||||
|
||||
// 计算进度比例
|
||||
double progress = currentTime / duration;
|
||||
|
||||
// 计算可见字符数
|
||||
// 使用略微超前的策略,确保文字不会落后于语音
|
||||
double adjustedProgress = MIN(progress * 1.05, 1.0);
|
||||
|
||||
NSUInteger visibleCount =
|
||||
(NSUInteger)round(fullText.length * adjustedProgress);
|
||||
|
||||
return MIN(visibleCount, fullText.length);
|
||||
}
|
||||
|
||||
@end
|
||||
79
keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h
Normal file
79
keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.h
Normal file
@@ -0,0 +1,79 @@
|
||||
//
|
||||
// TTSPlaybackPipeline.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "TTSServiceClient.h"
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// 播放管线代理
|
||||
@protocol TTSPlaybackPipelineDelegate <NSObject>
|
||||
@optional
|
||||
/// 开始播放片段
|
||||
- (void)pipelineDidStartSegment:(NSString *)segmentId
|
||||
duration:(NSTimeInterval)duration;
|
||||
/// 播放时间更新
|
||||
- (void)pipelineDidUpdatePlaybackTime:(NSTimeInterval)time
|
||||
segmentId:(NSString *)segmentId;
|
||||
/// 片段播放完成
|
||||
- (void)pipelineDidFinishSegment:(NSString *)segmentId;
|
||||
/// 所有片段播放完成
|
||||
- (void)pipelineDidFinishAllSegments;
|
||||
/// 播放出错
|
||||
- (void)pipelineDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// TTS 播放管线
|
||||
/// 根据 payloadType 路由到对应播放器
|
||||
@interface TTSPlaybackPipeline : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<TTSPlaybackPipelineDelegate> delegate;
|
||||
|
||||
/// 是否正在播放
|
||||
@property(nonatomic, assign, readonly, getter=isPlaying) BOOL playing;
|
||||
|
||||
/// 当前播放的片段 ID
|
||||
@property(nonatomic, copy, readonly, nullable) NSString *currentSegmentId;
|
||||
|
||||
/// 启动管线
|
||||
/// @param error 错误信息
|
||||
/// @return 是否启动成功
|
||||
- (BOOL)start:(NSError **)error;
|
||||
|
||||
/// 停止管线(立即停止,用于打断)
|
||||
- (void)stop;
|
||||
|
||||
/// 入队 URL 播放
|
||||
/// @param url 音频 URL
|
||||
/// @param segmentId 片段 ID
|
||||
- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId;
|
||||
|
||||
/// 入队音频数据块
|
||||
/// @param chunk 音频数据
|
||||
/// @param type 数据类型
|
||||
/// @param segmentId 片段 ID
|
||||
- (void)enqueueChunk:(NSData *)chunk
|
||||
payloadType:(TTSPayloadType)type
|
||||
segmentId:(NSString *)segmentId;
|
||||
|
||||
/// 标记片段数据完成(用于流式模式)
|
||||
/// @param segmentId 片段 ID
|
||||
- (void)markSegmentComplete:(NSString *)segmentId;
|
||||
|
||||
/// 获取片段的当前播放时间
|
||||
/// @param segmentId 片段 ID
|
||||
/// @return 当前时间(秒),如果未在播放则返回 0
|
||||
- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId;
|
||||
|
||||
/// 获取片段的总时长
|
||||
/// @param segmentId 片段 ID
|
||||
/// @return 总时长(秒)
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
343
keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m
Normal file
343
keyBoard/Class/AiTalk/VM/TTSPlaybackPipeline.m
Normal file
@@ -0,0 +1,343 @@
|
||||
//
|
||||
// TTSPlaybackPipeline.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "TTSPlaybackPipeline.h"
|
||||
#import "AudioStreamPlayer.h"
|
||||
#import <AVFoundation/AVFoundation.h>
|
||||
|
||||
@interface TTSPlaybackPipeline () <AudioStreamPlayerDelegate>
|
||||
|
||||
// 播放器
|
||||
@property(nonatomic, strong) AVPlayer *urlPlayer;
|
||||
@property(nonatomic, strong) AudioStreamPlayer *streamPlayer;
|
||||
|
||||
// 片段队列
|
||||
@property(nonatomic, strong) NSMutableArray<NSDictionary *> *segmentQueue;
|
||||
@property(nonatomic, strong)
|
||||
NSMutableDictionary<NSString *, NSNumber *> *segmentDurations;
|
||||
|
||||
// 状态
|
||||
@property(nonatomic, assign) BOOL playing;
|
||||
@property(nonatomic, copy) NSString *currentSegmentId;
|
||||
@property(nonatomic, strong) id playerTimeObserver;
|
||||
|
||||
// 队列
|
||||
@property(nonatomic, strong) dispatch_queue_t playbackQueue;
|
||||
|
||||
@end
|
||||
|
||||
@implementation TTSPlaybackPipeline
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_segmentQueue = [[NSMutableArray alloc] init];
|
||||
_segmentDurations = [[NSMutableDictionary alloc] init];
|
||||
_playbackQueue = dispatch_queue_create("com.keyboard.aitalk.playback",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self stop];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (BOOL)start:(NSError **)error {
|
||||
// 初始化 stream player
|
||||
if (!self.streamPlayer) {
|
||||
self.streamPlayer = [[AudioStreamPlayer alloc] init];
|
||||
self.streamPlayer.delegate = self;
|
||||
}
|
||||
|
||||
return [self.streamPlayer start:error];
|
||||
}
|
||||
|
||||
- (void)stop {
|
||||
dispatch_async(self.playbackQueue, ^{
|
||||
// 停止 URL 播放
|
||||
if (self.urlPlayer) {
|
||||
[self.urlPlayer pause];
|
||||
if (self.playerTimeObserver) {
|
||||
[self.urlPlayer removeTimeObserver:self.playerTimeObserver];
|
||||
self.playerTimeObserver = nil;
|
||||
}
|
||||
self.urlPlayer = nil;
|
||||
}
|
||||
|
||||
// 停止流式播放
|
||||
[self.streamPlayer stop];
|
||||
|
||||
// 清空队列
|
||||
[self.segmentQueue removeAllObjects];
|
||||
[self.segmentDurations removeAllObjects];
|
||||
|
||||
self.playing = NO;
|
||||
self.currentSegmentId = nil;
|
||||
});
|
||||
}
|
||||
|
||||
- (void)enqueueURL:(NSURL *)url segmentId:(NSString *)segmentId {
|
||||
if (!url || !segmentId)
|
||||
return;
|
||||
|
||||
dispatch_async(self.playbackQueue, ^{
|
||||
NSDictionary *segment = @{
|
||||
@"type" : @(TTSPayloadTypeURL),
|
||||
@"url" : url,
|
||||
@"segmentId" : segmentId
|
||||
};
|
||||
[self.segmentQueue addObject:segment];
|
||||
|
||||
// 如果当前没有在播放,开始播放
|
||||
if (!self.playing) {
|
||||
[self playNextSegment];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)enqueueChunk:(NSData *)chunk
|
||||
payloadType:(TTSPayloadType)type
|
||||
segmentId:(NSString *)segmentId {
|
||||
if (!chunk || !segmentId)
|
||||
return;
|
||||
|
||||
dispatch_async(self.playbackQueue, ^{
|
||||
switch (type) {
|
||||
case TTSPayloadTypePCMChunk:
|
||||
// 直接喂给 stream player
|
||||
[self.streamPlayer enqueuePCMChunk:chunk
|
||||
sampleRate:16000
|
||||
channels:1
|
||||
segmentId:segmentId];
|
||||
|
||||
if (!self.playing) {
|
||||
self.playing = YES;
|
||||
self.currentSegmentId = segmentId;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(pipelineDidStartSegment:duration:)]) {
|
||||
[self.delegate pipelineDidStartSegment:segmentId duration:0];
|
||||
}
|
||||
});
|
||||
}
|
||||
break;
|
||||
|
||||
case TTSPayloadTypeAACChunk:
|
||||
// TODO: AAC 解码 -> PCM -> streamPlayer
|
||||
NSLog(@"[TTSPlaybackPipeline] AAC chunk decoding not implemented yet");
|
||||
break;
|
||||
|
||||
case TTSPayloadTypeOpusChunk:
|
||||
// TODO: Opus 解码 -> PCM -> streamPlayer
|
||||
NSLog(@"[TTSPlaybackPipeline] Opus chunk decoding not implemented yet");
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)markSegmentComplete:(NSString *)segmentId {
|
||||
// Stream player 会自动处理播放完成
|
||||
}
|
||||
|
||||
- (NSTimeInterval)currentTimeForSegment:(NSString *)segmentId {
|
||||
if (![segmentId isEqualToString:self.currentSegmentId]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (self.urlPlayer) {
|
||||
return CMTimeGetSeconds(self.urlPlayer.currentTime);
|
||||
}
|
||||
|
||||
return [self.streamPlayer playbackTimeForSegment:segmentId];
|
||||
}
|
||||
|
||||
- (NSTimeInterval)durationForSegment:(NSString *)segmentId {
|
||||
NSNumber *duration = self.segmentDurations[segmentId];
|
||||
if (duration) {
|
||||
return duration.doubleValue;
|
||||
}
|
||||
|
||||
if (self.urlPlayer && [segmentId isEqualToString:self.currentSegmentId]) {
|
||||
CMTime duration = self.urlPlayer.currentItem.duration;
|
||||
if (CMTIME_IS_VALID(duration)) {
|
||||
return CMTimeGetSeconds(duration);
|
||||
}
|
||||
}
|
||||
|
||||
return [self.streamPlayer durationForSegment:segmentId];
|
||||
}
|
||||
|
||||
#pragma mark - Private Methods
|
||||
|
||||
- (void)playNextSegment {
|
||||
if (self.segmentQueue.count == 0) {
|
||||
self.playing = NO;
|
||||
self.currentSegmentId = nil;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(pipelineDidFinishAllSegments)]) {
|
||||
[self.delegate pipelineDidFinishAllSegments];
|
||||
}
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
NSDictionary *segment = self.segmentQueue.firstObject;
|
||||
[self.segmentQueue removeObjectAtIndex:0];
|
||||
|
||||
TTSPayloadType type = [segment[@"type"] integerValue];
|
||||
NSString *segmentId = segment[@"segmentId"];
|
||||
|
||||
self.playing = YES;
|
||||
self.currentSegmentId = segmentId;
|
||||
|
||||
if (type == TTSPayloadTypeURL) {
|
||||
NSURL *url = segment[@"url"];
|
||||
[self playURL:url segmentId:segmentId];
|
||||
}
|
||||
}
|
||||
|
||||
- (void)playURL:(NSURL *)url segmentId:(NSString *)segmentId {
|
||||
AVPlayerItem *item = [AVPlayerItem playerItemWithURL:url];
|
||||
|
||||
if (!self.urlPlayer) {
|
||||
self.urlPlayer = [AVPlayer playerWithPlayerItem:item];
|
||||
} else {
|
||||
[self.urlPlayer replaceCurrentItemWithPlayerItem:item];
|
||||
}
|
||||
|
||||
// 监听播放完成
|
||||
[[NSNotificationCenter defaultCenter]
|
||||
addObserver:self
|
||||
selector:@selector(playerItemDidFinish:)
|
||||
name:AVPlayerItemDidPlayToEndTimeNotification
|
||||
object:item];
|
||||
|
||||
// 添加时间观察器
|
||||
__weak typeof(self) weakSelf = self;
|
||||
self.playerTimeObserver = [self.urlPlayer
|
||||
addPeriodicTimeObserverForInterval:CMTimeMake(1, 30)
|
||||
queue:dispatch_get_main_queue()
|
||||
usingBlock:^(CMTime time) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
NSTimeInterval currentTime =
|
||||
CMTimeGetSeconds(time);
|
||||
if ([strongSelf.delegate
|
||||
respondsToSelector:@selector
|
||||
(pipelineDidUpdatePlaybackTime:
|
||||
segmentId:)]) {
|
||||
[strongSelf.delegate
|
||||
pipelineDidUpdatePlaybackTime:currentTime
|
||||
segmentId:segmentId];
|
||||
}
|
||||
}];
|
||||
|
||||
// 等待资源加载后获取时长并开始播放
|
||||
[item.asset
|
||||
loadValuesAsynchronouslyForKeys:@[ @"duration" ]
|
||||
completionHandler:^{
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
NSTimeInterval duration =
|
||||
CMTimeGetSeconds(item.duration);
|
||||
if (!isnan(duration)) {
|
||||
self.segmentDurations[segmentId] = @(duration);
|
||||
}
|
||||
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(pipelineDidStartSegment:
|
||||
duration:)]) {
|
||||
[self.delegate pipelineDidStartSegment:segmentId
|
||||
duration:duration];
|
||||
}
|
||||
|
||||
[self.urlPlayer play];
|
||||
});
|
||||
}];
|
||||
}
|
||||
|
||||
- (void)playerItemDidFinish:(NSNotification *)notification {
|
||||
[[NSNotificationCenter defaultCenter]
|
||||
removeObserver:self
|
||||
name:AVPlayerItemDidPlayToEndTimeNotification
|
||||
object:notification.object];
|
||||
|
||||
if (self.playerTimeObserver) {
|
||||
[self.urlPlayer removeTimeObserver:self.playerTimeObserver];
|
||||
self.playerTimeObserver = nil;
|
||||
}
|
||||
|
||||
NSString *finishedSegmentId = self.currentSegmentId;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(pipelineDidFinishSegment:)]) {
|
||||
[self.delegate pipelineDidFinishSegment:finishedSegmentId];
|
||||
}
|
||||
});
|
||||
|
||||
dispatch_async(self.playbackQueue, ^{
|
||||
[self playNextSegment];
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - AudioStreamPlayerDelegate
|
||||
|
||||
- (void)audioStreamPlayerDidStartSegment:(NSString *)segmentId {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(pipelineDidStartSegment:duration:)]) {
|
||||
[self.delegate pipelineDidStartSegment:segmentId duration:0];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)audioStreamPlayerDidUpdateTime:(NSTimeInterval)time
|
||||
segmentId:(NSString *)segmentId {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector
|
||||
(pipelineDidUpdatePlaybackTime:segmentId:)]) {
|
||||
[self.delegate pipelineDidUpdatePlaybackTime:time segmentId:segmentId];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)audioStreamPlayerDidFinishSegment:(NSString *)segmentId {
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(pipelineDidFinishSegment:)]) {
|
||||
[self.delegate pipelineDidFinishSegment:segmentId];
|
||||
}
|
||||
});
|
||||
|
||||
dispatch_async(self.playbackQueue, ^{
|
||||
// 检查是否还有更多片段
|
||||
if (self.segmentQueue.count == 0) {
|
||||
self.playing = NO;
|
||||
self.currentSegmentId = nil;
|
||||
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate
|
||||
respondsToSelector:@selector(pipelineDidFinishAllSegments)]) {
|
||||
[self.delegate pipelineDidFinishAllSegments];
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@end
|
||||
60
keyBoard/Class/AiTalk/VM/TTSServiceClient.h
Normal file
60
keyBoard/Class/AiTalk/VM/TTSServiceClient.h
Normal file
@@ -0,0 +1,60 @@
|
||||
//
|
||||
// TTSServiceClient.h
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import <Foundation/Foundation.h>
|
||||
|
||||
NS_ASSUME_NONNULL_BEGIN
|
||||
|
||||
/// TTS 返回数据类型
|
||||
typedef NS_ENUM(NSInteger, TTSPayloadType) {
|
||||
TTSPayloadTypeURL = 0, // 模式 A:返回 m4a/MP3 URL
|
||||
TTSPayloadTypePCMChunk, // 模式 D:返回 PCM chunk
|
||||
TTSPayloadTypeAACChunk, // 模式 B:返回 AAC chunk
|
||||
TTSPayloadTypeOpusChunk // 模式 C:返回 Opus chunk
|
||||
};
|
||||
|
||||
/// TTS 服务客户端代理
|
||||
@protocol TTSServiceClientDelegate <NSObject>
|
||||
@optional
|
||||
/// 收到音频 URL(模式 A)
|
||||
- (void)ttsClientDidReceiveURL:(NSURL *)url segmentId:(NSString *)segmentId;
|
||||
/// 收到音频数据块(模式 B/C/D)
|
||||
- (void)ttsClientDidReceiveAudioChunk:(NSData *)chunk
|
||||
payloadType:(TTSPayloadType)type
|
||||
segmentId:(NSString *)segmentId;
|
||||
/// 片段完成
|
||||
- (void)ttsClientDidFinishSegment:(NSString *)segmentId;
|
||||
/// 请求失败
|
||||
- (void)ttsClientDidFail:(NSError *)error;
|
||||
@end
|
||||
|
||||
/// TTS 服务客户端
|
||||
/// 统一网络层接口,支持多种 TTS 返回形态
|
||||
@interface TTSServiceClient : NSObject
|
||||
|
||||
@property(nonatomic, weak) id<TTSServiceClientDelegate> delegate;
|
||||
|
||||
/// TTS 服务器 URL
|
||||
@property(nonatomic, copy) NSString *serverURL;
|
||||
|
||||
/// 当前期望的返回类型(由服务端配置决定)
|
||||
@property(nonatomic, assign) TTSPayloadType expectedPayloadType;
|
||||
|
||||
/// 是否正在请求
|
||||
@property(nonatomic, assign, readonly, getter=isRequesting) BOOL requesting;
|
||||
|
||||
/// 请求 TTS 合成
|
||||
/// @param text 要合成的文本
|
||||
/// @param segmentId 片段 ID(用于标识和排序)
|
||||
- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId;
|
||||
|
||||
/// 取消所有请求
|
||||
- (void)cancel;
|
||||
|
||||
@end
|
||||
|
||||
NS_ASSUME_NONNULL_END
|
||||
298
keyBoard/Class/AiTalk/VM/TTSServiceClient.m
Normal file
298
keyBoard/Class/AiTalk/VM/TTSServiceClient.m
Normal file
@@ -0,0 +1,298 @@
|
||||
//
|
||||
// TTSServiceClient.m
|
||||
// keyBoard
|
||||
//
|
||||
// Created by Mac on 2026/1/15.
|
||||
//
|
||||
|
||||
#import "TTSServiceClient.h"
|
||||
|
||||
@interface TTSServiceClient () <NSURLSessionDataDelegate,
|
||||
NSURLSessionWebSocketDelegate>
|
||||
|
||||
@property(nonatomic, strong) NSURLSession *urlSession;
|
||||
@property(nonatomic, strong)
|
||||
NSMutableDictionary<NSString *, NSURLSessionTask *> *activeTasks;
|
||||
@property(nonatomic, strong) dispatch_queue_t networkQueue;
|
||||
@property(nonatomic, assign) BOOL requesting;
|
||||
|
||||
@end
|
||||
|
||||
@implementation TTSServiceClient
|
||||
|
||||
- (instancetype)init {
|
||||
self = [super init];
|
||||
if (self) {
|
||||
_networkQueue = dispatch_queue_create("com.keyboard.aitalk.tts.network",
|
||||
DISPATCH_QUEUE_SERIAL);
|
||||
_activeTasks = [[NSMutableDictionary alloc] init];
|
||||
_expectedPayloadType = TTSPayloadTypeURL; // 默认 URL 模式
|
||||
// TODO: 替换为实际的 TTS 服务器地址
|
||||
_serverURL = @"https://your-tts-server.com/api/tts";
|
||||
|
||||
[self setupSession];
|
||||
}
|
||||
return self;
|
||||
}
|
||||
|
||||
- (void)setupSession {
|
||||
NSURLSessionConfiguration *config =
|
||||
[NSURLSessionConfiguration defaultSessionConfiguration];
|
||||
config.timeoutIntervalForRequest = 30;
|
||||
config.timeoutIntervalForResource = 120;
|
||||
|
||||
self.urlSession = [NSURLSession sessionWithConfiguration:config
|
||||
delegate:self
|
||||
delegateQueue:nil];
|
||||
}
|
||||
|
||||
- (void)dealloc {
|
||||
[self cancel];
|
||||
}
|
||||
|
||||
#pragma mark - Public Methods
|
||||
|
||||
- (void)requestTTSForText:(NSString *)text segmentId:(NSString *)segmentId {
|
||||
if (!text || text.length == 0 || !segmentId) {
|
||||
return;
|
||||
}
|
||||
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
self.requesting = YES;
|
||||
|
||||
switch (self.expectedPayloadType) {
|
||||
case TTSPayloadTypeURL:
|
||||
[self requestURLMode:text segmentId:segmentId];
|
||||
break;
|
||||
case TTSPayloadTypePCMChunk:
|
||||
case TTSPayloadTypeAACChunk:
|
||||
case TTSPayloadTypeOpusChunk:
|
||||
[self requestStreamMode:text segmentId:segmentId];
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
- (void)cancel {
|
||||
dispatch_async(self.networkQueue, ^{
|
||||
for (NSURLSessionTask *task in self.activeTasks.allValues) {
|
||||
[task cancel];
|
||||
}
|
||||
[self.activeTasks removeAllObjects];
|
||||
self.requesting = NO;
|
||||
});
|
||||
}
|
||||
|
||||
#pragma mark - URL Mode (Mode A)
|
||||
|
||||
- (void)requestURLMode:(NSString *)text segmentId:(NSString *)segmentId {
|
||||
NSURL *url = [NSURL URLWithString:self.serverURL];
|
||||
NSMutableURLRequest *request = [NSMutableURLRequest requestWithURL:url];
|
||||
request.HTTPMethod = @"POST";
|
||||
[request setValue:@"application/json" forHTTPHeaderField:@"Content-Type"];
|
||||
|
||||
NSDictionary *body = @{
|
||||
@"text" : text,
|
||||
@"segmentId" : segmentId,
|
||||
@"format" : @"mp3" // 或 m4a
|
||||
};
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:body
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
request.HTTPBody = jsonData;
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
NSURLSessionDataTask *task = [self.urlSession
|
||||
dataTaskWithRequest:request
|
||||
completionHandler:^(NSData *_Nullable data,
|
||||
NSURLResponse *_Nullable response,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
dispatch_async(strongSelf.networkQueue, ^{
|
||||
[strongSelf.activeTasks removeObjectForKey:segmentId];
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled) {
|
||||
[strongSelf reportError:error];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// 解析响应
|
||||
NSError *parseError = nil;
|
||||
NSDictionary *json =
|
||||
[NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:&parseError];
|
||||
if (parseError) {
|
||||
[strongSelf reportError:parseError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *audioURLString = json[@"audioUrl"];
|
||||
if (audioURLString) {
|
||||
NSURL *audioURL = [NSURL URLWithString:audioURLString];
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([strongSelf.delegate respondsToSelector:@selector
|
||||
(ttsClientDidReceiveURL:segmentId:)]) {
|
||||
[strongSelf.delegate ttsClientDidReceiveURL:audioURL
|
||||
segmentId:segmentId];
|
||||
}
|
||||
if ([strongSelf.delegate respondsToSelector:@selector
|
||||
(ttsClientDidFinishSegment:)]) {
|
||||
[strongSelf.delegate ttsClientDidFinishSegment:segmentId];
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}];
|
||||
|
||||
self.activeTasks[segmentId] = task;
|
||||
[task resume];
|
||||
|
||||
NSLog(@"[TTSServiceClient] URL mode request for segment: %@", segmentId);
|
||||
}
|
||||
|
||||
#pragma mark - Stream Mode (Mode B/C/D)
|
||||
|
||||
- (void)requestStreamMode:(NSString *)text segmentId:(NSString *)segmentId {
|
||||
// WebSocket 连接用于流式接收
|
||||
NSString *wsURL =
|
||||
[self.serverURL stringByReplacingOccurrencesOfString:@"https://"
|
||||
withString:@"wss://"];
|
||||
wsURL = [wsURL stringByReplacingOccurrencesOfString:@"http://"
|
||||
withString:@"ws://"];
|
||||
wsURL = [wsURL stringByAppendingString:@"/stream"];
|
||||
|
||||
NSURL *url = [NSURL URLWithString:wsURL];
|
||||
NSURLSessionWebSocketTask *wsTask =
|
||||
[self.urlSession webSocketTaskWithURL:url];
|
||||
|
||||
self.activeTasks[segmentId] = wsTask;
|
||||
[wsTask resume];
|
||||
|
||||
// 发送请求
|
||||
NSDictionary *requestDict = @{
|
||||
@"text" : text,
|
||||
@"segmentId" : segmentId,
|
||||
@"format" : [self formatStringForPayloadType:self.expectedPayloadType]
|
||||
};
|
||||
|
||||
NSError *jsonError = nil;
|
||||
NSData *jsonData = [NSJSONSerialization dataWithJSONObject:requestDict
|
||||
options:0
|
||||
error:&jsonError];
|
||||
if (jsonError) {
|
||||
[self reportError:jsonError];
|
||||
return;
|
||||
}
|
||||
|
||||
NSString *jsonString = [[NSString alloc] initWithData:jsonData
|
||||
encoding:NSUTF8StringEncoding];
|
||||
NSURLSessionWebSocketMessage *message =
|
||||
[[NSURLSessionWebSocketMessage alloc] initWithString:jsonString];
|
||||
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[wsTask sendMessage:message
|
||||
completionHandler:^(NSError *_Nullable error) {
|
||||
if (error) {
|
||||
[weakSelf reportError:error];
|
||||
} else {
|
||||
[weakSelf receiveStreamMessage:wsTask segmentId:segmentId];
|
||||
}
|
||||
}];
|
||||
|
||||
NSLog(@"[TTSServiceClient] Stream mode request for segment: %@", segmentId);
|
||||
}
|
||||
|
||||
- (void)receiveStreamMessage:(NSURLSessionWebSocketTask *)wsTask
|
||||
segmentId:(NSString *)segmentId {
|
||||
__weak typeof(self) weakSelf = self;
|
||||
[wsTask receiveMessageWithCompletionHandler:^(
|
||||
NSURLSessionWebSocketMessage *_Nullable message,
|
||||
NSError *_Nullable error) {
|
||||
__strong typeof(weakSelf) strongSelf = weakSelf;
|
||||
if (!strongSelf)
|
||||
return;
|
||||
|
||||
if (error) {
|
||||
if (error.code != NSURLErrorCancelled && error.code != 57) {
|
||||
[strongSelf reportError:error];
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type == NSURLSessionWebSocketMessageTypeData) {
|
||||
// 音频数据块
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([strongSelf.delegate respondsToSelector:@selector
|
||||
(ttsClientDidReceiveAudioChunk:
|
||||
payloadType:segmentId:)]) {
|
||||
[strongSelf.delegate
|
||||
ttsClientDidReceiveAudioChunk:message.data
|
||||
payloadType:strongSelf.expectedPayloadType
|
||||
segmentId:segmentId];
|
||||
}
|
||||
});
|
||||
|
||||
// 继续接收
|
||||
[strongSelf receiveStreamMessage:wsTask segmentId:segmentId];
|
||||
} else if (message.type == NSURLSessionWebSocketMessageTypeString) {
|
||||
// 控制消息
|
||||
NSData *data = [message.string dataUsingEncoding:NSUTF8StringEncoding];
|
||||
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data
|
||||
options:0
|
||||
error:nil];
|
||||
|
||||
if ([json[@"type"] isEqualToString:@"done"]) {
|
||||
dispatch_async(strongSelf.networkQueue, ^{
|
||||
[strongSelf.activeTasks removeObjectForKey:segmentId];
|
||||
});
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([strongSelf.delegate
|
||||
respondsToSelector:@selector(ttsClientDidFinishSegment:)]) {
|
||||
[strongSelf.delegate ttsClientDidFinishSegment:segmentId];
|
||||
}
|
||||
});
|
||||
} else {
|
||||
// 继续接收
|
||||
[strongSelf receiveStreamMessage:wsTask segmentId:segmentId];
|
||||
}
|
||||
}
|
||||
}];
|
||||
}
|
||||
|
||||
- (NSString *)formatStringForPayloadType:(TTSPayloadType)type {
|
||||
switch (type) {
|
||||
case TTSPayloadTypePCMChunk:
|
||||
return @"pcm";
|
||||
case TTSPayloadTypeAACChunk:
|
||||
return @"aac";
|
||||
case TTSPayloadTypeOpusChunk:
|
||||
return @"opus";
|
||||
default:
|
||||
return @"mp3";
|
||||
}
|
||||
}
|
||||
|
||||
#pragma mark - Error Reporting
|
||||
|
||||
- (void)reportError:(NSError *)error {
|
||||
self.requesting = NO;
|
||||
dispatch_async(dispatch_get_main_queue(), ^{
|
||||
if ([self.delegate respondsToSelector:@selector(ttsClientDidFail:)]) {
|
||||
[self.delegate ttsClientDidFail:error];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@end
|
||||
Reference in New Issue
Block a user