Answer a question

I'm trying to scrape lowes.com product details and here's the script I'm trying to run

from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import time
from selenium.webdriver.chrome.options import Options

chrome_options = Options()
chrome_options.add_experimental_option('prefs', {
    'geolocation': True
})

#driver = webdriver.Chrome(ChromeDriverManager().install(),options=chrome_options)
#driver.execute_cdp_cmd("Page.setGeolocationOverride", {
#    "latitude": 34.052235,
#    "longitude": -118.243683,
#    "accuracy": 98
#})
driver.get("https://www.lowes.com/pd/Therma-Tru-Benchmark-Doors-Craftsman-Simulated-Divided-Light-Right-Hand-Inswing-Ready-To-Paint-Fiberglass-Prehung-Entry-Door-with-Insulating-Core-Common-36-in-x-80-in-Actual-37-5-in-x-81-5-in/1000157897")
driver.execute_script("window.scrollTo(0,document.body.scrollHeight/5)")
time.sleep(1)
driver.execute_script("window.scrollTo(0,(document.body.scrollHeight/5)*2)")
time.sleep(1)
driver.execute_script("window.scrollTo(0,(document.body.scrollHeight/5)*3)")
time.sleep(1)
driver.execute_script("window.scrollTo(0,(document.body.scrollHeight/5)*4)")
time.sleep(1)
driver.execute_script("window.scrollTo(0,(document.body.scrollHeight/5)*5)")
time.sleep(1)
content = driver.page_source
soup = BeautifulSoup(content,'html.parser')
imgs = soup.findAll("img", attrs={"class":"met-epc-item"})
for img in imgs:
    print(img.get("src"))
print("Price: "+soup.find("span", attrs={"class":"aPrice large"}).text)
brand = soup.find("a", attrs={"class":"Link__LinkStyled-RC__sc-b3hjw8-0 bYfcYt"})
print("brand url: "+ brand.get("href"))
print("brand name: "+ brand.get("text"))
print("brand desc: "+soup.find("h1", attrs={"class":"style__HeaderStyle-PDP__y7vp5g-12 iMECxW"}).text)
driver.close()

when I try to execute this script the price element cause an error that this element is not exist and when I look at the page in the chrome instance opened with selenium I fount that price is not appearing and a textbox asking for zipcode or city or state to show price and availability and when try to enter any zipcode or city or state nothing happened and when I try to refresh or enter any other URL in the lowes website it says access denied and to re enter any other URL in lowes need to open new chrome instance using selemnium. Is there any suggestion how to fix this and scrape the product correctly? also I wanna to make a note that when I open the website lowes from my normal browser chrome it opens correctly, show the price and not give me any access denied as we

Answers

The data you're looking for are embedded within the page in Json format:

import re
import json
import requests

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0"
}

url = "https://www.lowes.com/pd/Therma-Tru-Benchmark-Doors-Craftsman-Simulated-Divided-Light-Right-Hand-Inswing-Ready-To-Paint-Fiberglass-Prehung-Entry-Door-with-Insulating-Core-Common-36-in-x-80-in-Actual-37-5-in-x-81-5-in/1000157897"
t = requests.get(url, headers=headers).text

data = re.search(r"window\['__PRELOADED_STATE__'\] = (\{.*?\})<", t)
data = json.loads(data.group(1))

# uncomment to print all data:
# print(json.dumps(data, indent=4))

item_id = url.split("/")[-1]

print("Name:", data["productDetails"][item_id]["product"]["brand"])
print("Desc:", data["productDetails"][item_id]["product"]["description"])
print("Price:", data["productDetails"][item_id]["price"]["itemPrice"])

Prints:

Name: Therma-Tru Benchmark Doors
Desc: 36-in x 80-in Fiberglass Craftsman Right-Hand Inswing Ready to paint Unfinished Prehung Single Front Door with Brickmould
Price: 370
Logo

Python社区为您提供最前沿的新闻资讯和知识内容

更多推荐