ESP32s3系列ai语音聊天

注释:本博客记录作者搜寻资料学习成果,使用请自行删减。

感谢让我学到很多的博主:https://vor2345.blog.csdn.net/article/details/136975580

在今年九月份我在网上看到了自己的ai聊天小助手,说做就做!搜索了很多资料

选择物料: esp32s3(需要大容量ram)作者用的是xiao-sense系列,max98357,麦克风,喇叭

话不多说开写
用的是VScode,下载platformIO,然后新建工程

在这里插入图片描述

思路简介

实现大模型接入玩法
制作自己的AI助手小人
首先通过简单
一:连接wifi✔
二:接收音频✔
三:打包数据成josn格式✔
四:接入模型API实现STT✔
五:打包传输下来的文件转成josn格式✔
六:接入模型API实现TTT✔
七:串口输出文本✔
八:再次上传接入语音minmax实现TTS✔
九:编码收集,I2S播放✔
*/

用到的库

arduinojson库
audioi2s库

#include <Arduino.h>
#include "base64.h"
#include "WiFi.h"
#include "HTTPClient.h"
#include "cJSON.h"
#define ARDUINOJSON_SLOT_ID_SIZE 4
#define ARDUINOJSON_STRING_LENGTH_SIZE 4
#include <ArduinoJson.h>
#include "Audio.h"
#include <SPIFFS.h>

Audio audio;
HTTPClient HTTP_TTS;
#define key 3
#define ADC 2
#define ledPin BUILTIN_LED
//max98357扬声器引脚
#define I2S_DOUT 6  // DIN connection
#define I2S_BCLK 5  // Bit clock
#define I2S_LRC 4   // Left Right Clock

下面填写自己的注册信息百度有免费的token可以申请
创建应用,我们用短句识别就可以了
在这里插入图片描述

在这里插入图片描述
将自己的信息填入下面

// 2、修改百度语言技术的用户信息:https://console.bce.baidu.com/ai/?fromai=1#/ai/speech/app/list
String CUID = "1155xxxxxx"; //AppID
String CLIENT_ID = "uxlxxxxxxxxxxxxxxxxxxxxxxx"; //API Key
String CLIENT_SECRET = "6Qxxxxxxxxxxxxxxxxxxxxxxxx"; //Secret key
String token;//通过上面得到的密钥

下面是tts要用到的api信息
我们使用的是minmax他的响应比较快
上他的官网注册一下
下面分别是TTT和TTS的接口

//https://www.minimaxi.com/news/%E9%80%9A%E7%94%A8%E5%A4%A7%E6%A8%A1%E5%9E%8Babab65%E7%B3%BB%E5%88%97
//Minmax参数
const char *AIKey = "";//TTT的密钥在接口密钥里新建一个
String  apiUrl =    "https://api.minimax.chat/v1/text/chatcompletion_v2";    
String TTS_group_id ="";//账户信息里的groupID
String TTS_API_Key  =";//	TTS	同上最好再建一个密钥用同一个应该也可以
//const char *tts_url = "https://api.minimax.chat/v1/t2a_pro?GroupId="+TTS_group_id;

一些参数定义

HTTPClient http_client;
hw_timer_t *timer = NULL;
const int recordTimeSeconds = 3;//录音时间秒为单位
const int adc_data_len = 16000 * recordTimeSeconds;
const int data_json_len = adc_data_len * 2 * 1.4;
uint16_t *adc_data;
char *data_json;
String aduio_json;
uint8_t adc_start_flag = 0;//开始采集标志位
uint8_t adc_complete_flag = 0;//采集完成标志位
uint32_t num = 0;
portMUX_TYPE timerMux = portMUX_INITIALIZER_UNLOCKED;
String payload;//STT返回值
String input ;
String answer, aduiourl;
int audio_time ; 
int audio_end = 0;

函数申明

//函数申明
String gainToken();
void IRAM_ATTR onTimer();
void sendToSTT();
void assembleJson(String token);
String getGPTAnswer(String inputText);
String decodejson(String payload_message);
String getvAnswer(String ouputText) ;
void writeHexToMP3(String hexString, const char* filePath);
//WIFI账户和密码
const char* ssid = "veryveryfast";
const char* password = "veryveryfast123";
//WIFI连接
void WiFi_Connect()
{
	WiFi.begin(ssid, password);
	while (WiFi.status() != WL_CONNECTED)
	{ //这里是阻塞程序,直到连接成功
		delay(300);
		Serial.print(".");
	}

}

//初始化
void setup()
{
  Serial.begin(115200); // open the serial port at 115200 bps;
  Serial.println(ESP.getFreeHeap());
	delay(100);
	pinMode(ADC, ANALOG);//麦克风接口
  Serial.print("Connecting.. ");
	WiFi_Connect();
	Serial.println("WiFi connected");
	Serial.println("IP address: ");
	Serial.println(WiFi.localIP());
  delay(100);
  //上面是WiFi连接
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);//接口定义
  audio.setVolume(21); // 默认0...21声音大小
  delay(1000);
  token = gainToken();//得到密钥

  timer = timerBegin(0, 80, true);//40M分频
  timerAlarmWrite(timer, 125, true);//125微秒
  timerAttachInterrupt(timer, &onTimer, true);
  timerAlarmEnable(timer);
  timerStop(timer);  // 先暂停
  audio.setPinout(I2S_BCLK, I2S_LRC, I2S_DOUT);
  audio.setVolume(12); // 0...21
  adc_data = (uint16_t *)ps_malloc(adc_data_len * sizeof(uint16_t));
  if (!adc_data) {
    Serial.println("Failed to allocate memory for adc_data");
  }

  data_json = (char *)ps_malloc(data_json_len * sizeof(char));  // 根据需要调整大小
  if (!data_json) {
    Serial.println("Failed to allocate memory for data_json");
  }
  aduio_json = (char *)ps_malloc(data_json_len * sizeof(char));  // 根据需要调整大小
  if (!data_json) {
    Serial.println("Failed to allocate memory for data_json");
  }
   if (!SPIFFS.begin(true)) {
        Serial.println("SPIFFS Mount Failed");
        return;
    }
}

//主循环
uint32_t time1, time2;
void loop()
{
if (Serial.available() > 0)  //按键按下
  {
    if (Serial.read() == '1') 
    {
      Serial.printf("Start recognition\r\n\r\n");
      digitalWrite(ledPin, HIGH);
      adc_start_flag = 1;
      timerStart(timer);

      // time1=micros();
      while (!adc_complete_flag)  //等待采集完成
      {
        ets_delay_us(10);
      }
      // time2=micros()-time1;
      Serial.println(ESP.getFreeHeap());
      timerStop(timer);
      adc_complete_flag = 0;  //清标志
      Serial.printf("ok\r\n");
      digitalWrite(ledPin, LOW);
      assembleJson(token);
      sendToSTT();
      //Serial.printf("你号");
      Serial.println(input);
      //input = payload;
      //input ="今天很开心";
      Serial.println(ESP.getFreeHeap());
      answer = getGPTAnswer(input);
      Serial.println(answer);
      Serial.println(ESP.getPsramSize());  // 获取 PSRAM 总大小
      Serial.println(ESP.getFreePsram());   // 获取可用的 PSRAM

       aduiourl = getvAnswer(answer);
       if (aduiourl != "error") {
            writeHexToMP3(aduiourl, "/audio.mp3");
             audio_end = 1;
         while (audio_end)
            {
              audio.loop();
            }
           
            audio.stopSong();
          }
    }
  }
}

下面是函数集

//-----------------------------------------------------------
//发送token获得STT
void sendToSTT() {
  http_client.begin("http://vop.baidu.com/server_api");
  http_client.addHeader("Content-Type", "application/json");
  int httpCode = http_client.POST(data_json);

  if (httpCode > 0) {
    if (httpCode == HTTP_CODE_OK) {
      payload = http_client.getString();
      Serial.println(payload);
      input = decodejson(payload);
    }
  } else {
    Serial.printf("[HTTP] POST failed, error: %s\n", http_client.errorToString(httpCode).c_str());
  }
  http_client.end();
}

//---------------------------------------------------------------
//打包ADC采集为JSON格式
//函数中的定义是百度api的参数自己根据官网改就行了
void assembleJson(String token) 
{
  memset(data_json, '\0', data_json_len * sizeof(char));
  strcat(data_json, "{");
  strcat(data_json, "\"format\":\"pcm\",");
  strcat(data_json, "\"rate\":8000,");
  strcat(data_json, "\"dev_pid\":1537,");
  strcat(data_json, "\"channel\":1,");
  strcat(data_json, "\"cuid\":\"57722200\",");
  strcat(data_json, "\"token\":\"");
  strcat(data_json, token.c_str());
  strcat(data_json, "\",");
  sprintf(data_json + strlen(data_json), "\"len\":%d,", adc_data_len * 2);
  strcat(data_json, "\"speech\":\"");
  strcat(data_json, base64::encode((uint8_t *)adc_data, adc_data_len * sizeof(uint16_t)).c_str());
  strcat(data_json, "\"");
  strcat(data_json, "}");
}

//中断函数
//中断读取adc数据
在setup里设置了采样频率,应该是8000的采样率
void IRAM_ATTR onTimer() {
  // Increment the counter and set the time of ISR
  portENTER_CRITICAL_ISR(&timerMux);
  if (adc_start_flag == 1) {
    // Serial.println("");
    adc_data[num] = analogRead(ADC);
    num++;
    if (num >= adc_data_len) {
      adc_complete_flag = 1;
      adc_start_flag = 0;
      num = 0;
      // Serial.println(Complete_flag);
    }
  }
  portEXIT_CRITICAL_ISR(&timerMux);
}

//------------------------------------------------------------------------------
//调用接口获取api密钥得到TOKEN
//如果调试这里出错,可以把这个url自己打进去
String gainToken() {
  HTTPClient http;
  String token;
  String url = String("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=")+CLIENT_ID+String("&client_secret=")+CLIENT_SECRET;
  //String url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=CLIENT_ID&client_secret=CLIENT_SECRET";
  //报错的话用第二种,把里面的client_id和client_secret复制粘贴进去
  http.begin(url);
  int httpCode = http.GET();
  Serial.println("is sure to ping");
  if (httpCode > 0) {
    String payload_token = http.getString();
    DynamicJsonDocument doc(1024);
    deserializeJson(doc, payload_token);
    token = doc["access_token"].as<String>();
    Serial.println(token);
  } else {
    Serial.println("Error on HTTP request for token");
  }
  http.end();
  return token;
}

String decodejson(String payload_message)
{
    String nb ;
    DynamicJsonDocument doc(1024);
    deserializeJson(doc, payload_message);
    nb= doc["result"].as<String>();
    nb.remove(0,2);
    nb.remove((nb.length()-2),2);
    return nb;
}

//-------------------------------------------------------------------------------------
/*
上传给minmax的参数可以自己修改比如content如果要连续回答需要把这个改成变量,然后将上一次的值附上
*/
String getGPTAnswer(String inputText) {
  HTTPClient http_client_Minmax;
  //HTTPClient http;
  http_client_Minmax.setTimeout(10000);
  http_client_Minmax.begin(apiUrl);
  http_client_Minmax.addHeader("Content-Type", "application/json");
  String token_key = String("Bearer ") + AIKey;
  http_client_Minmax.addHeader("Authorization", token_key);
  String payload = "{\"model\":\"abab6.5s-chat\",\"messages\":[{\"role\": \"system\",\"content\": \"你的名字是小梁,你是我的女朋友,要求下面的回答严格控制在256字符以内。\"},{\"role\": \"user\",\"content\": \"" + inputText + "\"}]}";
  int httpResponseCode = http_client_Minmax.POST(payload);
  if (httpResponseCode == 200) {
    String response = http_client_Minmax.getString();
    http_client_Minmax.end();
    //Serial.println(response);     //调试接口

    // Parse JSON response
    DynamicJsonDocument jsonDoc(1024);
    deserializeJson(jsonDoc, response);
    String outputText = jsonDoc["choices"][0]["message"]["content"];
    return outputText;
    // Serial.println(outputText);
  } else {
    http_client_Minmax.end();
    Serial.printf("Error %i \n", httpResponseCode);
    return "<error>";
  }
}

//-----------------------------------------------------------------
//tts语音播报
String getvAnswer(String ouputText) {
  String tts_url = String("https://api.minimax.chat/v1/t2a_v2?GroupId=") + TTS_group_id;
  String api_bearer = "Bearer " + TTS_API_Key; // 注意这里添加了一个空格
  HTTP_TTS.begin(tts_url);
  HTTP_TTS.addHeader("Authorization", api_bearer);
  HTTP_TTS.addHeader("Content-Type", "application/json");

  // 创建一个StaticJsonDocument对象,足够大以存储JSON数据
  StaticJsonDocument<200> doc;
  // 填充数据
  //这里是minmax的参数定义
  doc["text"] = ouputText;
  doc["model"] = "speech-01-turbo-240228";
  //doc["audio_sample_rate"] = 32000;
  //doc["bitrate"] = 128000;
  JsonObject root = doc.createNestedObject("voice_setting");
  root["speed"] = 1 ;
  root["vol"] = 10 ;
  root["pitch"] = 0;
  root["voice_id"] = "diadia_xuemei";
  JsonObject root1 = doc.createNestedObject("audio_setting");
  root1["sample_rate"] = 32000 ;
  root1["bitrate"] = 128000 ;
  root1["channelint"] = "1";
  root1["format"] = "mp3" ;


  // 创建一个String对象来存储序列化后的JSON字符串
  String jsonString;
  // 序列化JSON到String对象
  serializeJson(doc, jsonString);
  int httpResponseCode = HTTP_TTS.POST(jsonString);
  Serial.print("1");
  if (httpResponseCode == 200) {
    aduio_json = HTTP_TTS.getString();
    //Serial.println(aduio_json);
    HTTP_TTS.end();
    //Serial.print("2");
    Serial.println(ESP.getFreeHeap());
    //size_t capacity = 4000000; // 根据你需要的 JSON 大小调整
    JsonDocument jsonDoc; // 默认在堆上分配内存
   // Serial.print("3");
    //DynamicJsonDocument jsonDoc(819200);
    DeserializationError error = deserializeJson(jsonDoc, aduio_json);
   if (error) 
  {
    Serial.print(F("deserializeJson() failed: "));
    Serial.println(error.f_str());
    
  }
    //Serial.print("4");
    String aduio = jsonDoc["data"]["audio"];
    int status = jsonDoc["data"]["status"];  
    //Serial.println(status);
     
    int audio_size =jsonDoc["extra_info"]["audio_size"];
    audio_time =jsonDoc["extra_info"]["audio_length"];

    //Serial.println(audio_size);
    int64_t status_code =jsonDoc["base_resp"]["status_code"];
    String status_msg = jsonDoc["base_resp"]["status_msg"];
    //Serial.println(aduio);
    
    
    return aduio;

  } else {
    Serial.printf("tts %i \n", httpResponseCode);
    HTTP_TTS.end();
    return "error";
  }
}
//边保存边读
void writeHexToMP3(String hexString, const char* filePath) {
    File file = SPIFFS.open(filePath, FILE_WRITE);
    if (!file) {
        Serial.println("Failed to open file for writing");
        return;
    }
    for (size_t i = 0; i < hexString.length(); i += 2) {
        String byteString = hexString.substring(i, i + 2);
        char byteValue = (char)strtol(byteString.c_str(), NULL, 16);
        file.write((uint8_t*)&byteValue, 1);
        if (i==5)
        {
          audio.connecttoFS(SPIFFS,"/audio.mp3");  //  128k mp3
        }
    }
    file.close();
    Serial.println("MP3 file written successfully");
}
//audio回调函数
// 可选
void audio_info(const char *info){
    Serial.print("info        "); Serial.println(info);
}
void audio_id3data(const char *info){  //id3元数据
    Serial.print("id3data     ");Serial.println(info);
}
void audio_eof_mp3(const char *info){  //文件结束
    Serial.print("eof_mp3     ");Serial.println(info);
    audio_end = 0 ;
}
void audio_showstation(const char *info){
    Serial.print("station     ");Serial.println(info);
}
void audio_showstreamtitle(const char *info){
    Serial.print("streamtitle ");Serial.println(info);
}
void audio_bitrate(const char *info){
    Serial.print("bitrate     ");Serial.println(info);
}
void audio_commercial(const char *info){  //时长(秒)
    Serial.print("commercial  ");Serial.println(info);
}
void audio_icyurl(const char *info){  //主页URL
    Serial.print("icyurl      ");Serial.println(info);
}
void audio_lasthost(const char *info){  //播放的流媒体URL
    Serial.print("lasthost    ");Serial.println(info);
}
void audio_eof_speech(const char *info){
    Serial.print("eof_speech  ");Serial.println(info);
}

用到的API的文档

百度STT: https://ai.baidu.com/ai-doc/SPEECH/Jlbxdezuf
minmaxTTT:https://platform.minimaxi.com/document/ChatCompletion%20v2?key=66701d281d57f38758d581d0
minmaxTTS:https://platform.minimaxi.com/document/T2A%20V2?key=66719005a427f0c8a5701643

Logo

欢迎加入西安开发者社区!我们致力于为西安地区的开发者提供学习、合作和成长的机会。参与我们的活动,与专家分享最新技术趋势,解决挑战,探索创新。加入我们,共同打造技术社区!

更多推荐