阿里通义千问Qwen1.5使用教程

下载各个版本LLM的文件，然后存入qwen文件夹，每个文件夹下各子文件夹即代表各个版本的llm，模型文件等存储在对应版本的子文件夹下。XXX-GPTQ-Int4的意思是对模型进行了精度为Int4的量化，虽然实测降低了一点速度但是显著降低了占用的显存。python版本为3.8，cuda(cudatoolkit)版本为11.8（版本太低运行时会报错）web网页端我使用了比较热门的。

文章共3,938字 · 阅读需要大约14分钟

一键AI生成摘要，助你高效阅读

问答

Cheng_0829

1986人浏览 · 2024-03-05 16:29:53

Cheng_0829 · 2024-03-05 16:29:53 发布

1.下载文件

在Hugging Face下载各个版本LLM的文件，然后存入qwen文件夹，每个文件夹下各子文件夹即代表各个版本的llm，模型文件等存储在对应版本的子文件夹下

XXX-GPTQ-Int4的意思是对模型进行了精度为Int4的量化，虽然实测降低了一点生成速度但是显著降低了占用的显存，适合显卡资源不足的研究者（毕竟Qwen1.5这几个小模型就是主打一个显存低啊哈）

1.安装环境

博主python版本为3.8，cuda(cudatoolkit)版本为11.8（版本太低运行时会报错）

pip install transformers==4.37.0

2.本地终端运行

from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    "Qwen/Qwen1.5-14B-Chat-AWQ",
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-14B-Chat-AWQ")

prompt = "Give me a short introduction to large language model."
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

3.web网页端运行

pip install gradio==4.19.2

web网页端我使用了比较热门的gradio框架，即官方推的聊天机器人框架:

import gradio as gr
import time

def echo(message, history, system_prompt, tokens):
    response = f"System prompt: {system_prompt}\n Message: {message}."
    for i in range(min(len(response), int(tokens))):
        time.sleep(0.05)
        yield response[: i+1]

demo = gr.ChatInterface(echo, 
                        additional_inputs=[
                            gr.Textbox("You are helpful AI.", label="System Prompt"), 
                            gr.Slider(10, 100)
                        ]
                       )

if __name__ == "__main__":
    demo.queue().launch()

公网：在demo.queue().launch()中加上share=True运行时即会生成一个随机公网地址（注意别开VPN）

4.添加DIY文件上传功能

pip install PyPDF2, docx


import warnings, sys, os, time
import transformers, PyPDF2
import gradio as gr
from docx import Document
warnings.filterwarnings('ignore')
transformers.logging.set_verbosity_error()

def txt_to_str(file_path):
    """
    txt, html, json, py, md, java, c, cpp, h, js, css, xml, yml, yaml, sql, sh, bat
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        return f"读取文件时发生错误: {e}"

def pdf_to_str(file_path):
    """
    pdf
    """
    try:
        with open(file_path, 'rb') as file:
            pdf_reader = PyPDF2.PdfReader(file)
            
            # # 检查PDF是否有页
            # if not len(pdf_reader.pages):
            #     return ""

            # 读取所有页面的内容并合并为一个字符串
            content = ""
            for page_num in range(len(pdf_reader.pages)):
                page_obj = pdf_reader.pages[page_num]
                content += page_obj.extract_text()

            # 返回提取的文本
            return content

    except Exception as e:
        return f"读取文件时发生错误: {e}"

def docx_to_str(file_path):
    """
    docx
    """
    try:
        # 打开docx文件
        doc = Document(file_path)

        # 将所有段落内容合并为一个字符串
        content = '\n'.join([paragraph.text for paragraph in doc.paragraphs])

        # 返回提取的文本
        return content

    except Exception as e:
        return f"读取docx文件时发生错误: {e}"

def file_to_str(file_path):
    
    file_types = ['.txt', '.html', '.json', '.py', '.md', '.java', '.c', '.cpp', '.h', '.js', '.css', '.xml', '.yml', '.yaml', '.sql', '.sh', '.bat'] + ['.pdf'] + ['.docx']
    if not file_path.endswith(tuple(file_types)):
        raise TypeError("文件类型不支持")
    if file_path.endswith('.pdf'):
        return pdf_to_str(file_path)
    elif file_path.endswith('.docx'):
        return docx_to_str(file_path)
    else:
        return txt_to_str(file_path)

def list_to_str(histories):
    histories_str = ""
    for i in range(len(histories)):
        user_prompt, response = histories[i]
        histories_str += f"I: {user_prompt}\nYou: {response}\n\n"
    return histories_str

def qwen(user_prompt="", histories=list(), system_prompt="", max_length=512, model_choice="Qwen1.5-0.5B-Chat"):
    if(histories != list()):
        user_prompt = user_prompt + "\n\n这是我们俩之前的聊天记录：{}".format(list_to_str(histories))
    device = "cuda" # the device to load the model onto

    model = transformers.AutoModelForCausalLM.from_pretrained(
        model_choice,
        torch_dtype="auto",
        device_map="auto"
    )
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_choice)
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text], return_tensors="pt").to(device)

    generated_ids = model.generate(
        model_inputs.input_ids,
        max_new_tokens=max_length
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    with open("./history/"+find_max("./history"), "a", encoding='utf-8') as f:
        f.write("User: " + user_prompt + "\nResponse: " + response + "\n\n")

    return response

def echo(user_prompt, histories, system_prompt, max_length, model_choice, uploaded_file_path=None):
    if uploaded_file_path:
        file_content = file_to_str(uploaded_file_path)
        # print(f"已成功上传并读取了文件的部分内容：\n{file_content[:70]}...")
        user_prompt += "\n\n这是文件内容:\n" + file_content

    response = qwen(user_prompt, histories, system_prompt, max_length, model_choice)

    for i in range(len(response)):
        time.sleep(0.05)
        yield response[: i+1]

def find_max(folder_path):
    # 使用os.listdir()列出文件夹中的所有条目
    all_entries = os.listdir(folder_path)
    # 创建一个空列表来存放文件名
    file_names = []
    # 遍历列表，检查每个条目是否为文件而非子目录，并将文件名添加到列表中
    for entry in all_entries:
        full_path = os.path.join(folder_path, entry)
        if os.path.isfile(full_path):
            file_names.append(entry)

    return max(file_names)

def main():
    current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
    with open("./history/history_{}.txt".format(current_time), "w", encoding='utf-8') as f:
        f.write("")
    demo = gr.ChatInterface(echo, 
                            additional_inputs=[
                                gr.Textbox("You are a helpful assistant.", label="System Prompt"), 
                                gr.Slider(128, 2048, label="The max length of tokens", interactive=True),
                                gr.Radio([f"程俊凯的辣鸡大模型Qwen1.5-{i}B-Chat" for i in [0.5, 1.8, 4, 4]], label="Model Choice"),
                                gr.UploadButton(label="Upload a file", type="filepath", file_count="single", file_types=["file"], interactive=True, size="sm")
                            ]
                        )
    
    # demo.queue().launch()
    demo.queue().launch(share=True)
#pip install gevent==22.10.2
if __name__ == "__main__":
    main()