Skip to content
Advertisement

Reddit isn’t scraping the top comments (python/selenium)

Put the entire code into a question, thank you to all that have replied but this issue is super annoying either way help is appreciated!

Context: This code is meant to go onto the top reddit post of the day/week, then screenshot it and once that’s done it goes to the comments and screenshots the top comments of said post, the former works but the latter does not.

import time,utils,string
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from utils import config

def scrape(post_url):
bot = utils.create_bot(headless=True)
data = {}

try:
    # Load cookies to prevent cookie overlay & other issues
    bot.get('https://www.reddit.com')
    for cookie in config['reddit_cookies'].split('; '):
        cookie_data = cookie.split('=')
        bot.add_cookie({'name':cookie_data[0],'value':cookie_data[1],'domain':'reddit.com'})
    bot.get(post_url)

    # Fetching the post itself, text & screenshot
    post = WebDriverWait(bot, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.Post')))
    post_text = post.find_element(By.CSS_SELECTOR, 'h1').text
    data['post'] = post_text
    post.screenshot('output/post.png')

    # Let comments load
    bot.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(3)
    
    # Fetching comments & top level comment determinator
    comments = WebDriverWait(bot, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[id^=t1_][tabindex]')))
    allowed_style = comments[0].get_attribute("style")
    
    # Filter for top only comments
    NUMBER_OF_COMMENTS = 10
    comments = [comment for comment in comments if comment.get_attribute("style") == allowed_style][:NUMBER_OF_COMMENTS]

    print('💬 Scraping comments...',end="",flush=True)
    # Save time & resources by only fetching X content
    for i in range(len(comments)):
        try:
            print('.',end="",flush=True)
            # Filter out locked comments (AutoMod) 
            try:
                comments[i].find_elements(By.CSS_SELECTOR, '.icon.icon-lock_fill')
                continue
            except:
                pass

            # Scrolling to the comment ensures that the profile picture loads
            # Credits: https://stackoverflow.com/a/57630350
            desired_y = (comments[i].size['height'] / 2) + comments[i].location['y']
            window_h = bot.execute_script('return window.innerHeight')
            window_y = bot.execute_script('return window.pageYOffset')
            current_y = (window_h / 2) + window_y
            scroll_y_by = desired_y - current_y

            bot.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by)
            time.sleep(0.2)

            # Getting comment into string
            text = "n".join([element.text for element in comments[i].find_elements_by_css_selector('.RichTextJSON-root')])

            # Screenshot & save text
            comments[i].screenshot(f'output/{i}.png')
            data[str(i)] = ''.join(filter(lambda c: c in string.printable, text))
        except Exception as e:
            if config['debug']:
                raise e
            pass

    if bot.session_id:
        bot.quit()
    return data
except Exception as e:
    if bot.session_id:
        bot.quit()
    if config['debug']:
        raise e
    return False

Advertisement

Answer

Code was fixed by removing code which filters locked comments

User contributions licensed under: CC BY-SA
2 People found this is helpful
Advertisement