esp32语音大模型接入实现语音聊天
实现大模型接入玩法制作自己的AI助手小人首先通过简单一:连接wifi✔二:接收音频✔三:打包数据成josn格式✔四:接入模型API实现STT✔五:打包传输下来的文件转成josn格式✔六:接入模型API实现TTT✔七:串口输出文本✔八:再次上传接入语音minmax实现TTS✔九:编码收集,I2S播放✔*/
ESP32s3系列ai语音聊天
注释:本博客记录作者搜寻资料学习成果,使用请自行删减。
感谢让我学到很多的博主:https://vor2345.blog.csdn.net/article/details/136975580
在今年九月份我在网上看到了自己的ai聊天小助手,说做就做!搜索了很多资料
选择物料: esp32s3(需要大容量ram)作者用的是xiao-sense系列,max98357,麦克风,喇叭
话不多说开写
用的是VScode,下载platformIO,然后新建工程

思路简介
实现大模型接入玩法
制作自己的AI助手小人
首先通过简单
一:连接wifi✔
二:接收音频✔
三:打包数据成josn格式✔
四:接入模型API实现STT✔
五:打包传输下来的文件转成josn格式✔
六:接入模型API实现TTT✔
七:串口输出文本✔
八:再次上传接入语音minmax实现TTS✔
九:编码收集,I2S播放✔
*/
用到的库
arduinojson库
audioi2s库
#include <Arduino.h>
#include "base64.h"
#include "WiFi.h"
#include "HTTPClient.h"
#include "cJSON.h"
#define ARDUINOJSON_SLOT_ID_SIZE 4
#define ARDUINOJSON_STRING_LENGTH_SIZE 4
#include <ArduinoJson.h>
#include "Audio.h"
#include <SPIFFS.h>
Audio audio;
HTTPClient HTTP_TTS;
#define key 3
#define ADC 2
#define ledPin BUILTIN_LED
//max98357扬声器引脚
#define I2S_DOUT 6 // DIN connection
#define I2S_BCLK 5 // Bit clock
#define I2S_LRC 4 // Left Right Clock
下面填写自己的注册信息百度有免费的token可以申请
创建应用,我们用短句识别就可以了

将自己的信息填入下面
// 2、修改百度语言技术的用户信息:https://console.bce.baidu.com/ai/?fromai=1#/ai/speech/app/list
String CUID = "1155xxxxxx"; //AppID
String CLIENT_ID = "uxlxxxxxxxxxxxxxxxxxxxxxxx"; //API Key
String CLIENT_SECRET = "6Qxxxxxxxxxxxxxxxxxxxxxxxx"; //Secret key
String token;//通过上面得到的密钥
下面是tts要用到的api信息
我们使用的是minmax他的响应比较快
上他的官网注册一下
下面分别是TTT和TTS的接口
//https://www.minimaxi.com/news/%E9%80%9A%E7%94%A8%E5%A4%A7%E6%A8%A1%E5%9E%8Babab65%E7%B3%BB%E5%88%97
//Minmax参数
const char *AIKey = "";//TTT的密钥在接口密钥里新建一个
String apiUrl = "https://api.minimax.chat/v1/text/chatcompletion_v2";
String TTS_group_id ="";//账户信息里的groupID
String TTS_API_Key =";// TTS 同上最好再建一个密钥用同一个应该也可以
//const char *tts_url = "https://api.minimax.chat/v1/t2a_pro?GroupId="+TTS_group_id;
一些参数定义
HTTPClient http_client;
hw_timer_t *timer = NULL;
const int recordTimeSeconds = 3;//录音时间秒为单位
const int adc_data_len = 16000 * recordTimeSeconds;
const int data_json_len = adc_data_len * 2 * 1.4;
uint16_t *adc_data;
char *data_json;
String aduio_json;
uint8_t adc_start_flag = 0;//开始采集标志位
uint8_t adc_complete_flag = 0;//采集完成标志位
uint32_t num = 0;
portMUX_TYPE timerMux = portMUX_INITIALIZER_UNLOCKED;
String payload;//STT返回值
String input ;
String answer, aduiourl;
int audio_time ;
int audio_end = 0;
函数申明
//函数申明
String gainToken();
void IRAM_ATTR onTimer();
void sendToSTT();
void assembleJson(String token);
String getGPTAnswer(String inputText);
String decodejson(String payload_message);
String getvAnswer(String ouputText) ;
void writeHexToMP3(String hexString, const char* filePath);
//WIFI账户和密码
const char* ssid = "veryveryfast";
const char* password = "veryveryfast123";
//WIFI连接
void WiFi_Connect()
{
WiFi.begin(ssid, password);
while (WiFi.status() != WL_CONNECTED)
{ //这里是阻塞程序,直到连接成功
delay(300);
Serial.print(".");
}
}
//初始化
void setup()
{
Serial.begin(115200); // open the serial port at 115200 bps;
Serial.println(ESP.getFreeHeap());
delay(100);
pinMode(ADC, ANALOG);//麦克风接口
Serial.print("Connecting.. ");
WiFi_Connect();
Serial.println("WiFi connected");
Serial.println("IP address: ");
Serial.println(WiFi.localIP());
delay(100);
//上面是WiFi连接
audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);//接口定义
audio.setVolume(21); // 默认0...21声音大小
delay(1000);
token = gainToken();//得到密钥
timer = timerBegin(0, 80, true);//40M分频
timerAlarmWrite(timer, 125, true);//125微秒
timerAttachInterrupt(timer, &onTimer, true);
timerAlarmEnable(timer);
timerStop(timer); // 先暂停
audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
audio.setVolume(12); // 0...21
adc_data = (uint16_t *)ps_malloc(adc_data_len * sizeof(uint16_t));
if (!adc_data) {
Serial.println("Failed to allocate memory for adc_data");
}
data_json = (char *)ps_malloc(data_json_len * sizeof(char)); // 根据需要调整大小
if (!data_json) {
Serial.println("Failed to allocate memory for data_json");
}
aduio_json = (char *)ps_malloc(data_json_len * sizeof(char)); // 根据需要调整大小
if (!data_json) {
Serial.println("Failed to allocate memory for data_json");
}
if (!SPIFFS.begin(true)) {
Serial.println("SPIFFS Mount Failed");
return;
}
}
//主循环
uint32_t time1, time2;
void loop()
{
if (Serial.available() > 0) //按键按下
{
if (Serial.read() == '1')
{
Serial.printf("Start recognition\r\n\r\n");
digitalWrite(ledPin, HIGH);
adc_start_flag = 1;
timerStart(timer);
// time1=micros();
while (!adc_complete_flag) //等待采集完成
{
ets_delay_us(10);
}
// time2=micros()-time1;
Serial.println(ESP.getFreeHeap());
timerStop(timer);
adc_complete_flag = 0; //清标志
Serial.printf("ok\r\n");
digitalWrite(ledPin, LOW);
assembleJson(token);
sendToSTT();
//Serial.printf("你号");
Serial.println(input);
//input = payload;
//input ="今天很开心";
Serial.println(ESP.getFreeHeap());
answer = getGPTAnswer(input);
Serial.println(answer);
Serial.println(ESP.getPsramSize()); // 获取 PSRAM 总大小
Serial.println(ESP.getFreePsram()); // 获取可用的 PSRAM
aduiourl = getvAnswer(answer);
if (aduiourl != "error") {
writeHexToMP3(aduiourl, "/audio.mp3");
audio_end = 1;
while (audio_end)
{
audio.loop();
}
audio.stopSong();
}
}
}
}
下面是函数集
//-----------------------------------------------------------
//发送token获得STT
void sendToSTT() {
http_client.begin("http://vop.baidu.com/server_api");
http_client.addHeader("Content-Type", "application/json");
int httpCode = http_client.POST(data_json);
if (httpCode > 0) {
if (httpCode == HTTP_CODE_OK) {
payload = http_client.getString();
Serial.println(payload);
input = decodejson(payload);
}
} else {
Serial.printf("[HTTP] POST failed, error: %s\n", http_client.errorToString(httpCode).c_str());
}
http_client.end();
}
//---------------------------------------------------------------
//打包ADC采集为JSON格式
//函数中的定义是百度api的参数自己根据官网改就行了
void assembleJson(String token)
{
memset(data_json, '\0', data_json_len * sizeof(char));
strcat(data_json, "{");
strcat(data_json, "\"format\":\"pcm\",");
strcat(data_json, "\"rate\":8000,");
strcat(data_json, "\"dev_pid\":1537,");
strcat(data_json, "\"channel\":1,");
strcat(data_json, "\"cuid\":\"57722200\",");
strcat(data_json, "\"token\":\"");
strcat(data_json, token.c_str());
strcat(data_json, "\",");
sprintf(data_json + strlen(data_json), "\"len\":%d,", adc_data_len * 2);
strcat(data_json, "\"speech\":\"");
strcat(data_json, base64::encode((uint8_t *)adc_data, adc_data_len * sizeof(uint16_t)).c_str());
strcat(data_json, "\"");
strcat(data_json, "}");
}
//中断函数
//中断读取adc数据
在setup里设置了采样频率,应该是8000的采样率
void IRAM_ATTR onTimer() {
// Increment the counter and set the time of ISR
portENTER_CRITICAL_ISR(&timerMux);
if (adc_start_flag == 1) {
// Serial.println("");
adc_data[num] = analogRead(ADC);
num++;
if (num >= adc_data_len) {
adc_complete_flag = 1;
adc_start_flag = 0;
num = 0;
// Serial.println(Complete_flag);
}
}
portEXIT_CRITICAL_ISR(&timerMux);
}
//------------------------------------------------------------------------------
//调用接口获取api密钥得到TOKEN
//如果调试这里出错,可以把这个url自己打进去
String gainToken() {
HTTPClient http;
String token;
String url = String("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=")+CLIENT_ID+String("&client_secret=")+CLIENT_SECRET;
//String url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=CLIENT_ID&client_secret=CLIENT_SECRET";
//报错的话用第二种,把里面的client_id和client_secret复制粘贴进去
http.begin(url);
int httpCode = http.GET();
Serial.println("is sure to ping");
if (httpCode > 0) {
String payload_token = http.getString();
DynamicJsonDocument doc(1024);
deserializeJson(doc, payload_token);
token = doc["access_token"].as<String>();
Serial.println(token);
} else {
Serial.println("Error on HTTP request for token");
}
http.end();
return token;
}
String decodejson(String payload_message)
{
String nb ;
DynamicJsonDocument doc(1024);
deserializeJson(doc, payload_message);
nb= doc["result"].as<String>();
nb.remove(0,2);
nb.remove((nb.length()-2),2);
return nb;
}
//-------------------------------------------------------------------------------------
/*
上传给minmax的参数可以自己修改比如content如果要连续回答需要把这个改成变量,然后将上一次的值附上
*/
String getGPTAnswer(String inputText) {
HTTPClient http_client_Minmax;
//HTTPClient http;
http_client_Minmax.setTimeout(10000);
http_client_Minmax.begin(apiUrl);
http_client_Minmax.addHeader("Content-Type", "application/json");
String token_key = String("Bearer ") + AIKey;
http_client_Minmax.addHeader("Authorization", token_key);
String payload = "{\"model\":\"abab6.5s-chat\",\"messages\":[{\"role\": \"system\",\"content\": \"你的名字是小梁,你是我的女朋友,要求下面的回答严格控制在256字符以内。\"},{\"role\": \"user\",\"content\": \"" + inputText + "\"}]}";
int httpResponseCode = http_client_Minmax.POST(payload);
if (httpResponseCode == 200) {
String response = http_client_Minmax.getString();
http_client_Minmax.end();
//Serial.println(response); //调试接口
// Parse JSON response
DynamicJsonDocument jsonDoc(1024);
deserializeJson(jsonDoc, response);
String outputText = jsonDoc["choices"][0]["message"]["content"];
return outputText;
// Serial.println(outputText);
} else {
http_client_Minmax.end();
Serial.printf("Error %i \n", httpResponseCode);
return "<error>";
}
}
//-----------------------------------------------------------------
//tts语音播报
String getvAnswer(String ouputText) {
String tts_url = String("https://api.minimax.chat/v1/t2a_v2?GroupId=") + TTS_group_id;
String api_bearer = "Bearer " + TTS_API_Key; // 注意这里添加了一个空格
HTTP_TTS.begin(tts_url);
HTTP_TTS.addHeader("Authorization", api_bearer);
HTTP_TTS.addHeader("Content-Type", "application/json");
// 创建一个StaticJsonDocument对象,足够大以存储JSON数据
StaticJsonDocument<200> doc;
// 填充数据
//这里是minmax的参数定义
doc["text"] = ouputText;
doc["model"] = "speech-01-turbo-240228";
//doc["audio_sample_rate"] = 32000;
//doc["bitrate"] = 128000;
JsonObject root = doc.createNestedObject("voice_setting");
root["speed"] = 1 ;
root["vol"] = 10 ;
root["pitch"] = 0;
root["voice_id"] = "diadia_xuemei";
JsonObject root1 = doc.createNestedObject("audio_setting");
root1["sample_rate"] = 32000 ;
root1["bitrate"] = 128000 ;
root1["channelint"] = "1";
root1["format"] = "mp3" ;
// 创建一个String对象来存储序列化后的JSON字符串
String jsonString;
// 序列化JSON到String对象
serializeJson(doc, jsonString);
int httpResponseCode = HTTP_TTS.POST(jsonString);
Serial.print("1");
if (httpResponseCode == 200) {
aduio_json = HTTP_TTS.getString();
//Serial.println(aduio_json);
HTTP_TTS.end();
//Serial.print("2");
Serial.println(ESP.getFreeHeap());
//size_t capacity = 4000000; // 根据你需要的 JSON 大小调整
JsonDocument jsonDoc; // 默认在堆上分配内存
// Serial.print("3");
//DynamicJsonDocument jsonDoc(819200);
DeserializationError error = deserializeJson(jsonDoc, aduio_json);
if (error)
{
Serial.print(F("deserializeJson() failed: "));
Serial.println(error.f_str());
}
//Serial.print("4");
String aduio = jsonDoc["data"]["audio"];
int status = jsonDoc["data"]["status"];
//Serial.println(status);
int audio_size =jsonDoc["extra_info"]["audio_size"];
audio_time =jsonDoc["extra_info"]["audio_length"];
//Serial.println(audio_size);
int64_t status_code =jsonDoc["base_resp"]["status_code"];
String status_msg = jsonDoc["base_resp"]["status_msg"];
//Serial.println(aduio);
return aduio;
} else {
Serial.printf("tts %i \n", httpResponseCode);
HTTP_TTS.end();
return "error";
}
}
//边保存边读
void writeHexToMP3(String hexString, const char* filePath) {
File file = SPIFFS.open(filePath, FILE_WRITE);
if (!file) {
Serial.println("Failed to open file for writing");
return;
}
for (size_t i = 0; i < hexString.length(); i += 2) {
String byteString = hexString.substring(i, i + 2);
char byteValue = (char)strtol(byteString.c_str(), NULL, 16);
file.write((uint8_t*)&byteValue, 1);
if (i==5)
{
audio.connecttoFS(SPIFFS,"/audio.mp3"); // 128k mp3
}
}
file.close();
Serial.println("MP3 file written successfully");
}
//audio回调函数
// 可选
void audio_info(const char *info){
Serial.print("info "); Serial.println(info);
}
void audio_id3data(const char *info){ //id3元数据
Serial.print("id3data ");Serial.println(info);
}
void audio_eof_mp3(const char *info){ //文件结束
Serial.print("eof_mp3 ");Serial.println(info);
audio_end = 0 ;
}
void audio_showstation(const char *info){
Serial.print("station ");Serial.println(info);
}
void audio_showstreamtitle(const char *info){
Serial.print("streamtitle ");Serial.println(info);
}
void audio_bitrate(const char *info){
Serial.print("bitrate ");Serial.println(info);
}
void audio_commercial(const char *info){ //时长(秒)
Serial.print("commercial ");Serial.println(info);
}
void audio_icyurl(const char *info){ //主页URL
Serial.print("icyurl ");Serial.println(info);
}
void audio_lasthost(const char *info){ //播放的流媒体URL
Serial.print("lasthost ");Serial.println(info);
}
void audio_eof_speech(const char *info){
Serial.print("eof_speech ");Serial.println(info);
}
用到的API的文档
百度STT: https://ai.baidu.com/ai-doc/SPEECH/Jlbxdezuf
minmaxTTT:https://platform.minimaxi.com/document/ChatCompletion%20v2?key=66701d281d57f38758d581d0
minmaxTTS:https://platform.minimaxi.com/document/T2A%20V2?key=66719005a427f0c8a5701643
更多推荐



所有评论(0)