阿里通义千问Qwen1.5使用教程
下载各个版本LLM的文件,然后存入qwen文件夹,每个文件夹下各子文件夹即代表各个版本的llm,模型文件等存储在对应版本的子文件夹下。XXX-GPTQ-Int4的意思是对模型进行了精度为Int4的量化,虽然实测降低了一点速度但是显著降低了占用的显存。python版本为3.8,cuda(cudatoolkit)版本为11.8(版本太低运行时会报错)web网页端我使用了比较热门的。
文章共3,938字 · 阅读需要大约14分钟
一键AI生成摘要,助你高效阅读
问答
·
1.下载文件
在Hugging Face下载各个版本LLM的文件,然后存入qwen文件夹,每个文件夹下各子文件夹即代表各个版本的llm,模型文件等存储在对应版本的子文件夹下
XXX-GPTQ-Int4的意思是对模型进行了精度为Int4的量化,虽然实测降低了一点生成速度但是显著降低了占用的显存,适合显卡资源不足的研究者(毕竟Qwen1.5这几个小模型就是主打一个显存低啊哈)
1.安装环境
博主python版本为3.8,cuda(cudatoolkit)版本为11.8(版本太低运行时会报错)
pip install transformers==4.37.0
2.本地终端运行
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" # the device to load the model onto
model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen1.5-14B-Chat-AWQ",
torch_dtype="auto",
device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-14B-Chat-AWQ")
prompt = "Give me a short introduction to large language model."
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
generated_ids = model.generate(
model_inputs.input_ids,
max_new_tokens=512
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
3.web网页端运行
pip install gradio==4.19.2
web网页端我使用了比较热门的gradio框架,即官方推的聊天机器人框架:
import gradio as gr
import time
def echo(message, history, system_prompt, tokens):
response = f"System prompt: {system_prompt}\n Message: {message}."
for i in range(min(len(response), int(tokens))):
time.sleep(0.05)
yield response[: i+1]
demo = gr.ChatInterface(echo,
additional_inputs=[
gr.Textbox("You are helpful AI.", label="System Prompt"),
gr.Slider(10, 100)
]
)
if __name__ == "__main__":
demo.queue().launch()
公网:在demo.queue().launch()
中加上share=True
运行时即会生成一个随机公网地址(注意别开VPN)
4.添加DIY文件上传功能
pip install PyPDF2, docx
import warnings, sys, os, time
import transformers, PyPDF2
import gradio as gr
from docx import Document
warnings.filterwarnings('ignore')
transformers.logging.set_verbosity_error()
def txt_to_str(file_path):
"""
txt, html, json, py, md, java, c, cpp, h, js, css, xml, yml, yaml, sql, sh, bat
"""
try:
with open(file_path, 'r', encoding='utf-8') as file:
return file.read()
except Exception as e:
return f"读取文件时发生错误: {e}"
def pdf_to_str(file_path):
"""
pdf
"""
try:
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
# # 检查PDF是否有页
# if not len(pdf_reader.pages):
# return ""
# 读取所有页面的内容并合并为一个字符串
content = ""
for page_num in range(len(pdf_reader.pages)):
page_obj = pdf_reader.pages[page_num]
content += page_obj.extract_text()
# 返回提取的文本
return content
except Exception as e:
return f"读取文件时发生错误: {e}"
def docx_to_str(file_path):
"""
docx
"""
try:
# 打开docx文件
doc = Document(file_path)
# 将所有段落内容合并为一个字符串
content = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
# 返回提取的文本
return content
except Exception as e:
return f"读取docx文件时发生错误: {e}"
def file_to_str(file_path):
file_types = ['.txt', '.html', '.json', '.py', '.md', '.java', '.c', '.cpp', '.h', '.js', '.css', '.xml', '.yml', '.yaml', '.sql', '.sh', '.bat'] + ['.pdf'] + ['.docx']
if not file_path.endswith(tuple(file_types)):
raise TypeError("文件类型不支持")
if file_path.endswith('.pdf'):
return pdf_to_str(file_path)
elif file_path.endswith('.docx'):
return docx_to_str(file_path)
else:
return txt_to_str(file_path)
def list_to_str(histories):
histories_str = ""
for i in range(len(histories)):
user_prompt, response = histories[i]
histories_str += f"I: {user_prompt}\nYou: {response}\n\n"
return histories_str
def qwen(user_prompt="", histories=list(), system_prompt="", max_length=512, model_choice="Qwen1.5-0.5B-Chat"):
if(histories != list()):
user_prompt = user_prompt + "\n\n这是我们俩之前的聊天记录:{}".format(list_to_str(histories))
device = "cuda" # the device to load the model onto
model = transformers.AutoModelForCausalLM.from_pretrained(
model_choice,
torch_dtype="auto",
device_map="auto"
)
tokenizer = transformers.AutoTokenizer.from_pretrained(model_choice)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)
generated_ids = model.generate(
model_inputs.input_ids,
max_new_tokens=max_length
)
generated_ids = [
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
with open("./history/"+find_max("./history"), "a", encoding='utf-8') as f:
f.write("User: " + user_prompt + "\nResponse: " + response + "\n\n")
return response
def echo(user_prompt, histories, system_prompt, max_length, model_choice, uploaded_file_path=None):
if uploaded_file_path:
file_content = file_to_str(uploaded_file_path)
# print(f"已成功上传并读取了文件的部分内容:\n{file_content[:70]}...")
user_prompt += "\n\n这是文件内容:\n" + file_content
response = qwen(user_prompt, histories, system_prompt, max_length, model_choice)
for i in range(len(response)):
time.sleep(0.05)
yield response[: i+1]
def find_max(folder_path):
# 使用os.listdir()列出文件夹中的所有条目
all_entries = os.listdir(folder_path)
# 创建一个空列表来存放文件名
file_names = []
# 遍历列表,检查每个条目是否为文件而非子目录,并将文件名添加到列表中
for entry in all_entries:
full_path = os.path.join(folder_path, entry)
if os.path.isfile(full_path):
file_names.append(entry)
return max(file_names)
def main():
current_time = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime())
with open("./history/history_{}.txt".format(current_time), "w", encoding='utf-8') as f:
f.write("")
demo = gr.ChatInterface(echo,
additional_inputs=[
gr.Textbox("You are a helpful assistant.", label="System Prompt"),
gr.Slider(128, 2048, label="The max length of tokens", interactive=True),
gr.Radio([f"程俊凯的辣鸡大模型Qwen1.5-{i}B-Chat" for i in [0.5, 1.8, 4, 4]], label="Model Choice"),
gr.UploadButton(label="Upload a file", type="filepath", file_count="single", file_types=["file"], interactive=True, size="sm")
]
)
# demo.queue().launch()
demo.queue().launch(share=True)
#pip install gevent==22.10.2
if __name__ == "__main__":
main()
更多推荐
已为社区贡献1条内容
所有评论(0)