文本情感分析
import jiebaimport refrom pymongo import MongoClientfrom snownlp import SnowNLPdef load_file():'''加载外部词典,正则去除所有的标点符号,返回纯文本'''jieba.load_userdict("C:/Users/jieba/dict_lzf....
·
import jieba
import re
from pymongo import MongoClient
from snownlp import SnowNLP
def load_file():
'''
加载外部词典,正则去除所有的标点符号,返回纯文本
'''
jieba.load_userdict("C:/Users/jieba/dict_lzf.txt") # 加载外部自定义词典 # 加载外部自定义词典
client = MongoClient('localhost', 27017) # 链接数据库
db = client['Taoguba']
name = 'List'
for i in range(5):
db_name = name + str(i + 1)
db_emotino = db[db_name]
news = db_emotino.find()
emo = []
id_ = 0
for i in news:
new = (i["Content"])
r = '[’!@#~¥%……&*() ——+|}{“:”?》《,。、‘;’、】【!"#$%&\'()*+,-./:; <=>?@[\\]^_`{|}~]+'
news1 = re.sub(r, '', new)
news1 = re.sub('[a-zA-Z0-9]', '', news1)
stop_new = stop_dict(news1)
cut = cut_package(stop_new)
emo.append(cut)
emo = emotion(emo)
print("情感相似度如下:")
print(emo)
for i in emo:
id_ += 1
e_name = db_name + str("_emotion")
write_to_DB(e_name, id_, i)
print("情感分类结果如下:")
sum_number(emo)
def stop_dict(news):
'''
去除所有的停用词
'''
stopwords = open("C:/Users/stopworld.txt", 'r',
encoding='utf-8').read()
outstr = ''
for word in news:
if word not in stopwords:
outstr += word
return outstr
def cut_package(news):
'''
按照不同的模式切分
'''
seg_list = [x for x in jieba.cut(news, cut_all=False)] # 精确切割模式(默认为精确模式)
seg = (' '.join(seg_list))
return seg
# print(seg)
# seg_list = jieba.cut(news, cut_all=True) # 全模式
# print("Full Mode:", ' '.join(seg_list))
# seg_list = jieba.cut_for_search(news) # 搜索引擎模式
# print("Full Mode:", ' '.join(seg_list))
def emotion(text):
mood_ = []
for i in text:
mood = SnowNLP(i)
sim_mood = mood.sentiments
mood_.append(sim_mood)
return mood_
def sum_number(summarry):
number = []
for i in summarry:
if(i > 0.6):
number.append(1)
else:
number.append(0)
print(number)
numb = set(number)
print("情感统计结果如下:")
for i in numb:
a = number.count(i)
print(" %i 一共出现了%a次!" % (i, a))
if number.count(0) <= number.count(1):
print("文档偏积极型!")
else:
print("文档偏消极型!")
def write_to_DB(name, id, emotion):
'''
保存数据库
'''
client = MongoClient('localhost', 27017) # 链接数据库
db = client['Taoguba']
collection_name = db[name]
collection_name.save({"_id": id, "Emotion": emotion})
def main():
load_file()
if __name__ == '__main__':
main()
点击阅读全文
更多推荐
所有评论(0)