【技术经验分享】Python+Spark知识图谱酒店推荐系统 酒店价格预测系统 酒店可视化 酒店爬虫 酒店大数据 neo4j知识图谱 深度学习 机器学习 酒店数据分析 人工智能 神经网络
Python+Spark知识图谱酒店推荐系统 酒店价格预测系统 酒店可视化 酒店爬虫 酒店大数据 neo4j知识图谱 深度学习 机器学习 酒店数据分析 人工智能 神经网络
·
开发技术
前端:vue.js
后端:springboot+mybatis-plus
数据库:mysql
算法(机器学习、深度学习):协同过滤算法(基于用户、基于物品全部实现)、神经网络混合CF推荐算法 MLP深度学习算法、SVD深度学习算法、协同过滤推荐算法、线性回归预测 KNN CNN卷积神经、LSTM情感分析、百度地图API
爬虫:python、requests、chrome_driver
大数据分析:spark、echarts hadoop
核心代码演示
爬虫代码
# -*- codeing = utf-8 -*-
import re
from time import sleep
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from lxml import etree
import requests
from base import headers
from db import db_util
from model import check_hotel_exist, check_room_exist, quick_check
options = webdriver.ChromeOptions()
# 不加载图片
# options.add_experimental_option('prefs', {'profile.managed_default_content_settings.images': 2})
# options.binary_location = r"C:\\chromedriver.exe"
#driver = webdriver.Chrome(options=options)
chrome_driver_path = r"D:\\bigdata2024_hotel_neo4j_rec_predict\\Hotel-Spider\\chromedriver.exe"
#driver = webdriver.Chrome(options=options)
driver = webdriver.Chrome(chrome_driver_path,options=options)
wait = WebDriverWait(driver, 30) # 设置等待时间
# 这里把第一步的打印出来的cookie复制上就行
COOKIES = [{'domain': '.qunar.com', 'expiry': 1687332998, 'httpOnly': False, 'name': 'ariaDefaultTheme', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'undefined'}, {'domain': '.qunar.com', 'expiry': 1717601797, 'httpOnly': False, 'name': 'ctf_june', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '1683616182042##iK3wWKjAVuPwawPwa%3DP%2BEPPOWR2%3DWKoIaDX8aSjwWSiGXsDnWDkhEPXsasGRiK3siK3saKjmWSXOVR2NWstNWuPwaUvt'}, {'domain': '.qunar.com', 'httpOnly': False, 'name': 'QN44', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'imrdobl5205'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': True, 'name': '_v', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'TTNuZY2Qhei2HxTzPpqUIqQhF1GB0WTduBaYPPxU_O0UYRGES9PBEx98Gu2KsHp40_yjZ4OuHwoK426UiZDE16W8xR5rBkQS5APXomHLWZRc1hyXghKfZ03hLDhvtVnX0UVruVZHmQmqFNC8aJ8c_ppkO-TVMl7fY5fsblPuR2rZ'}, {'domain': '.qunar.com', 'expiry': 1718004898, 'httpOnly': False, 'name': 'QN1', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '00008780319851dff9102d68'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': False, 'name': '_s', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 's_LD6G2CQLQ5WUSFCMRXOUOFESVY'}, {'domain': '.qunar.com', 'expiry': 1721028905, 'httpOnly': False, 'name': 'fid', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'e5c5bde8-3813-48d0-addd-b79b298c6ab9'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': False, 'name': 'csrfToken', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '4KazGERihPVdleEjauuoCaRuggdQUzBX'}, {'domain': '.qunar.com', 'httpOnly': False, 'name': 'QN271', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'ed2e873c-8c2c-4cf5-9eb5-2ed418ad50f5'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': False, 'name': '_t', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '28217255'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': False, 'name': '_q', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'U.imrdobl5205'}, {'domain': '.qunar.com', 'expiry': 1694244925, 'httpOnly': False, 'name': 'QN42', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '%E5%8E%BB%E5%93%AA%E5%84%BF%E7%94%A8%E6%88%B7'}, {'domain': '.qunar.com', 'expiry': 1717601702, 'httpOnly': False, 'name': 'ctt_june', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '1683616182042##iK3wWR38WhPwawPwastAEKTIaSj%3DXS2ma%3DaAX%3DP8aRX%2BWSPOaSt%2BaPGIVKHTiK3siK3saKjmWSXOVR28aSgmWuPwaUvt'}, {'domain': '.qunar.com', 'expiry': 1689060898, 'httpOnly': False, 'name': 'HN2', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'qukgzqsnnlqgl'}, {'domain': '.qunar.com', 'expiry': 1689060901, 'httpOnly': False, 'name': 'QN269', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '7855ED90082A11EE9664FA163E7E0E4E'}, {'domain': '.qunar.com', 'httpOnly': False, 'name': 'QN25', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'e063eb57-2c2a-4294-b735-d37cb97953af-9f992f90'}, {'domain': '.qunar.com', 'expiry': 1689060898, 'httpOnly': False, 'name': 'HN1', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'v192ceef7a5f492f892518e2674efb85b8'}, {'domain': '.qunar.com', 'expiry': 1687073699, 'httpOnly': False, 'name': 'QN277', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'organic'}, {'domain': '.qunar.com', 'expiry': 1687073699, 'httpOnly': False, 'name': 'QN205', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'organic'}, {'domain': '.qunar.com', 'httpOnly': False, 'name': 'qunar-assist', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '{%22version%22:%2220211215173359.925%22%2C%22show%22:false%2C%22audio%22:false%2C%22speed%22:%22middle%22%2C%22zomm%22:1%2C%22cursor%22:false%2C%22pointer%22:false%2C%22bigtext%22:false%2C%22overead%22:false%2C%22readscreen%22:false%2C%22theme%22:%22default%22}'}, {'domain': '.hotel.qunar.com', 'expiry': 1686728197, 'httpOnly': False, 'name': '__qt', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'v1%7CVTJGc2RHVmtYMTh2bUI1Y1NJTUF0bjdONm1rSklsZ0dyRGlGNlBqWmVuQk0zK3hobmJtdnBDalNkbDBrYWRPL0hOZGVrSXJSd0lPcDU0WVlmRmQvV1dSbnhuREY3c3h6dlk1cGdNS01sQytTOW9ua1NNT0lLdW1KczJEdWVnaDlZY2U0NktqVm5DaXRERnlQeVZXZWo5S1dxTHYrZXA3bG1qYVg2ZXpjU0RNPQ%3D%3D%7C1686468997594%7CVTJGc2RHVmtYMTlNeXZ6K1lva1B6ZkYzMFNjNFQwWDZSbWVRY0hBa2VsMUdVRXZiNzRhRUYrUkdQRiszZmptR3NIZlNCVnViREtLOHI2MTBLRUNnL2c9PQ%3D%3D%7CVTJGc2RHVmtYMSs1MjBBOW9mV0kvMTYxY21BQUozSFFsNDd1cDVobWVFWFRTa2NINGxndnNaOHFKSkpuM1NaQ2dXckpyem43UU5uN3lpemJLaExxZmZxSk03QWlBeTBWN0FFb3dZYTQ1NkMzYnByb2laSlBPNlVLT1pIZ0JYSlpGYldWQ2FRZ21hbWtaSEE2OUFnaGd4MVgrUzF0QVhBZlZtR2FTY1JZRk5uZUxjYUZ3ZGhTNlB6djgzMThZb3hNUWMxTXF6NnVsdk90dDBIZmQ5NkRldXYvbjdwQ2wyTVhIb2NXeTFZTzNGczJkV1NoVmVIMHZJTjBsK3NjQ3hZQmUrTEo3b0lJenpieVpRQ2FaZFRnV3lUSnpHUUJrbWtORU1FR1ZHQU0zWkZacnRyYitMZXMwRzQzMytQNEdVN2t2ZFFKMU9UV1FvQ0x4dFk2UHBJcXpiR3BiMlZMN0VQRHZ6U3NUTlBHZmMwMzNRZE5Pc1hQa0UrL0FjVm5rQVk3Sm9FT2l2YXhUWk44QTdOYkZ6K2NvWENRaklXdVB2YWhLb0VseW5zV1p4UW5JeHY1dUE2MzN2WVhSb0IybGtPMUdyWS9LdGZXT2FjMUd2eFl5K2M4dlE2c01EZCtaZ3dMbzAvZzV4UnpKUUU5WENuNXRBUVQ1bk5CcGNmaENUbkp5L09abmI4QVFOMWt3aXNsOUlBSzZkSE52NGpHZ0ZZc1pNVUR1YUV4cnR0clNWWUl5SXZab0cxZ2hnbFBJaFZaUll3TUpiYndubGhVS05PQzM1M25ZQ1lYL1hDVVRtUHE1N1hqd3dvUXFzYkRKUlM1emJ5MXBOelVOMVl4ZmN1WA%3D%3D'}, {'domain': '.qunar.com', 'expiry': 1694244996, 'httpOnly': True, 'name': '_vi', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'j7xVpZusah3Fd_4yp531_n-Vg2HnhvIER1KB5UZB-k15RfbfGt6XVUt9VnAAa3m5XZVJEjB7cfOTJcJuFLxGli-MGwRSgLlZeGC_1MDH70E7TC2H4tVGJULLbETIRQ6eSe3sql1zNsCz2iTFRCiK_muXnq2IueZreN4AQIXltWBm'}, {'domain': '.qunar.com', 'expiry': 1717601729, 'httpOnly': False, 'name': 'cs_june', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'c8d615b10725571c19a318c9a0c8e1c174e00da3f20ae7c6ca014c799cba74aa2682c012ecab4ee28a2b46a84861402d8a70fbace87609b554488398336741f4b17c80df7eee7c02a9c1a6a5b97c11798db54efd0b29c05b1b8f63b12688bd705a737ae180251ef5be23400b098dd8ca'}, {'domain': '.qunar.com', 'expiry': 1721028898, 'httpOnly': False, 'name': 'QN300', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'organic'}, {'domain': '.qunar.com', 'httpOnly': False, 'name': 'QN267', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': '18296613040fe7b2b8'}, {'domain': '.qunar.com', 'expiry': 1721028996, 'httpOnly': False, 'name': '_i', 'path': '/', 'sameSite': 'Lax', 'secure': False, 'value': 'ueHd8gbHqMyXnTOX-K5-2dkyzmkX'}]
数据库代码
DROP TABLE IF EXISTS `tb_comment`;
CREATE TABLE `tb_comment` (
`id` int(11) NOT NULL AUTO_INCREMENT COMMENT 'id',
`uid` int(11) NULL DEFAULT NULL COMMENT 'UID',
`username` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '用户名',
`oid` int(11) NULL DEFAULT NULL COMMENT '评论物品ID',
`oname` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '物品名',
`title` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '标题',
`content` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '内容',
`label` varchar(100) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '情感分析结果',
`score` double NULL DEFAULT NULL COMMENT '情感分析结果',
`deleted` tinyint(1) NULL DEFAULT 0 COMMENT '删除',
`create_time` datetime(0) NULL DEFAULT NULL,
`update_time` datetime(0) NULL DEFAULT NULL,
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 10 CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '评论表' ROW_FORMAT = Dynamic;
代码实现后运行效果展示
更多推荐
已为社区贡献8条内容
所有评论(0)