metahuman-stream是基于ernerf模型的流式数字人,实现音视频同步对话。

metahuman-stream
xtts-streaming-server
srs

部署 srs

# rtmpserver
docker run -it -d \
-p 1935:1935 -p 1985:1985 -p 8080:8080 -p 8000:8000/udp -p 10080:10080/udp \
--name srs \
registry.cn-hangzhou.aliyuncs.com/ossrs/srs:5

部署 xtts-streaming-server

下载XTTS-v2模型到宿主机

docker run --gpus=all -d -p 9000:80 \
-e COQUI_TOS_AGREED=1 \
-v /opt/data/model/XTTS-v2:/app/tts_models \
--name him-xtts-streaming-server \
ghcr.io/coqui-ai/xtts-streaming-server:latest

部署 metahuman-stream

参考项目的Dockerfile

启动容器

sudo docker run --gpus all -it -d \
--network=host \
--name nerfstream \
nvcr.io/nvidia/cuda:11.6.1-cudnn8-devel-ubuntu20.04

sudo docker exec -ti nerfstream bash

安装工具和anaconda

apt-get update -yq --fix-missing \
 && DEBIAN_FRONTEND=noninteractive apt-get install -yq --no-install-recommends \
    pkg-config \
    wget \
    cmake \
    curl \
    git \
    vim

wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh

sh Miniconda3-latest-Linux-x86_64.sh -b -u -p ~/miniconda3
~/miniconda3/bin/conda init

source ~/.bashrc

创建conda环境&安装依赖

conda create -n nerfstream python=3.10

conda activate nerfstream

pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/

conda install pytorch==1.12.1 torchvision==0.13.1 cudatoolkit=11.3 -c pytorch

pip install "git+https://github.com/facebookresearch/pytorch3d.git"

pip install tensorflow-gpu==2.8.0

pip uninstall protobuf
pip install protobuf==3.20.1

conda install ffmpeg

apt-get install portaudio19-dev python-all-dev python3-all-dev
conda install pyaudio

克隆源码&安装项目依赖

cd /
git clone https://github.com/lipku/metahuman-stream.git
cd metahuman-stream
pip install -r requirements.txt

cd /
git clone https://github.com/lipku/python_rtmpstream.git
cd python_rtmpstream
git submodule update --init
pip install wheel
cd python
# 修改conda目录
vim CMakeLists.txt
pip install .

配置LLM

以Qwen为例,修改app.py

def llm_response(message):
    from llm.LLM import LLM
    llm = LLM().init_model('Qwen', model_path= 'Qwen/Qwen-1_8B-Chat')
    response = llm.chat(message)
    print(response)
    return response

运行

export HF_ENDPOINT=https://hf-mirror.com
python app.py										# 默认
python app.py --tts xtts --ref_file data/ref.wav	#指定克隆的语音

效果

在这里插入图片描述

Logo

权威|前沿|技术|干货|国内首个API全生命周期开发者社区

更多推荐