As to say this code works but problem that i am facing that only one url it scrape the data afterward it through an error as show below in figure help me out from this . it print only one link after it through session not created error
import requests from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys import pandas as pd import time url = ["https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html", "https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html", "https://www.skechers.com/women/shoes/ultra-flex/12843.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html", "https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html", "https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html", "https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html", "https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html", "https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html", "https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html", "https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html", "https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html", "https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html", "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html", "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html", "https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"] for endpoint in url: options = webdriver.ChromeOptions() options.add_experimental_option("excludeSwitches", ['enable-automation']) options.add_argument('--disable-blink-features=AutomationControlled') options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36") options.add_argument("--remote-debugging-port=9222") driver = webdriver.Chrome('F:/chromedriver.exe',options=options) driver.get(endpoint) time.sleep(5) image= None page_Source = driver.page_source soup = BeautifulSoup(page_Source, 'html.parser') content= soup.find_all('div',class_='c-product-description-features s-product-description-features row') pid=soup.find('span',class_='product-id').text links = soup.find_all('img', srcset=True) for link in links: img= link['srcset'] alt=link['alt'] print(pid ,img,alt)
Advertisement
Answer
import requests from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.keys import Keys import time url = ["https://www.skechers.com/women/shoes/ultra-flex/12843.html", "https://www.skechers.com/women/shoes/ultra-flex---twilight-twinkle/149173.html", "https://www.skechers.com/women/shoes/ultra-flex---statements/12841.html", "https://www.skechers.com/women/shoes/ultra-flex/12843.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---extreme-outlook/124580.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst---lunar-mars/124577.html", "https://www.skechers.com/women/shoes/skechers-gowalk-hyper-burst/124575.html", "https://www.skechers.com/women/shoes/skechers-gowalk-stability---coco-jazz/124603.html", "https://www.skechers.com/women/shoes/skechers-gowalk-stability---magnificent-glow/124602.html", "https://www.skechers.com/women/shoes/skechers-gowalk-joy/124191.html", "https://www.skechers.com/women/shoes/skechers-gowalk-joy---paradise/15601.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---vivid-blush/149408.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---kaleidoscope-splash/149134.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0/149062.html", "https://www.skechers.com/women/shoes/skech-air-element-2.0---rising-gaze/149611.html", "https://www.skechers.com/women/shoes/skechers-gowalk-air---twirl/124073.html", "https://www.skechers.com/women/shoes/gowalk-air---whirl/124074.html", "https://www.skechers.com/women/shoes/skechers-arch-fit-refine---her-best/104092.html", "https://www.skechers.com/women/shoes/skechers-arch-fit-refine/104090.html", "https://www.skechers.com/women/shoes/glide-step---stepping-up/104086.html", "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---sharp-witted/149007.html", "https://www.skechers.com/women/shoes/relaxed-fit-empire-dlux---paradise-sky/149274.html", "https://www.skechers.com/women/shoes/skechers-gorun-smart/128230.html"] count=0 for endpoint in url: print(count) count+=1 options = webdriver.ChromeOptions() options.add_experimental_option("excludeSwitches", ['enable-automation']) options.add_argument('--disable-blink-features=AutomationControlled') options.add_argument( "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36") #options.add_argument("--remote-debugging-port=9222") driver = webdriver.Chrome('./chromedriver.exe', options=options) driver.get(endpoint)
ISSUE:
The issue is with the remote debugging port, you already used it with one instance ,
Solution:
remove that