VLLM部署
·
1.配置虚拟环境
conda create -n vllm python=3.12 -y
conda activate vllm
2.安装vllm
pip install --upgrade pip
pip install vllm
pip show vllm
3.下载模型
pip install modelscope
modelscope download --model Qwen/Qwen3-8B --local_dir {local_path}
4.部署openai API服务
vllm serve /root/.cache/modelscope/hub/models/Qwen/Qwen3-0.6B \
--api-key abc123 \
--served-model-name Qwen/Qwen3-0.6B \
--max_model_len 2048 \
--gpu-memory-utilization 0.8 \
--port 7890
5.服务验证
curl http://127.0.0.1:7890/v1/completions -H "Authorization: Bearer abc123"
curl http://localhost:7890/v1/completions -H "Content-Type: application/json" -H "Authorization: Bearer abc123" -d '{"model": "Qwen/Qwen3-0.6B", "prompt": "Hello!", "max_tokens": 50}'
6.调用对话服务
from openai import OpenAI
client=OpenAI(
base_url=f"http://127.0.0.1:7890/v1",
api_key="abc123",
)
completion=client.chat.completions.create(
model="Qwen/Qwen3-0.6B",
messages=[
{"role":"user","content":"Hello!"}
]
)
print(completion.choices[0].message)
更多推荐


所有评论(0)