Put the entire code into a question, thank you to all that have replied but this issue is super annoying either way help is appreciated!
Context: This code is meant to go onto the top reddit post of the day/week, then screenshot it and once that’s done it goes to the comments and screenshots the top comments of said post, the former works but the latter does not.
import time,utils,string from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from utils import config def scrape(post_url): bot = utils.create_bot(headless=True) data = {} try: # Load cookies to prevent cookie overlay & other issues bot.get('https://www.reddit.com') for cookie in config['reddit_cookies'].split('; '): cookie_data = cookie.split('=') bot.add_cookie({'name':cookie_data[0],'value':cookie_data[1],'domain':'reddit.com'}) bot.get(post_url) # Fetching the post itself, text & screenshot post = WebDriverWait(bot, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.Post'))) post_text = post.find_element(By.CSS_SELECTOR, 'h1').text data['post'] = post_text post.screenshot('output/post.png') # Let comments load bot.execute_script("window.scrollTo(0, document.body.scrollHeight);") time.sleep(3) # Fetching comments & top level comment determinator comments = WebDriverWait(bot, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[id^=t1_][tabindex]'))) allowed_style = comments[0].get_attribute("style") # Filter for top only comments NUMBER_OF_COMMENTS = 10 comments = [comment for comment in comments if comment.get_attribute("style") == allowed_style][:NUMBER_OF_COMMENTS] print('💬 Scraping comments...',end="",flush=True) # Save time & resources by only fetching X content for i in range(len(comments)): try: print('.',end="",flush=True) # Filter out locked comments (AutoMod) try: comments[i].find_elements(By.CSS_SELECTOR, '.icon.icon-lock_fill') continue except: pass # Scrolling to the comment ensures that the profile picture loads # Credits: https://stackoverflow.com/a/57630350 desired_y = (comments[i].size['height'] / 2) + comments[i].location['y'] window_h = bot.execute_script('return window.innerHeight') window_y = bot.execute_script('return window.pageYOffset') current_y = (window_h / 2) + window_y scroll_y_by = desired_y - current_y bot.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by) time.sleep(0.2) # Getting comment into string text = "n".join([element.text for element in comments[i].find_elements_by_css_selector('.RichTextJSON-root')]) # Screenshot & save text comments[i].screenshot(f'output/{i}.png') data[str(i)] = ''.join(filter(lambda c: c in string.printable, text)) except Exception as e: if config['debug']: raise e pass if bot.session_id: bot.quit() return data except Exception as e: if bot.session_id: bot.quit() if config['debug']: raise e return False
Advertisement
Answer
Code was fixed by removing code which filters locked comments