How to use schedule library to schedule script in Python

Question

I am trying to use import schedule library in Jupiter notebook as the Cron and Task scheduler in Windows doesn't work for me. I am trying to execute this code every day at 8am. Can some one help define the job, thank you so much! Answer this one should work And instead of shedule library, i using here just own

Accepted Answer

this one should workimport requestsimport pandas as pdfrom bs4 import BeautifulSoupfrom datetime import dateimport datetimeimport asynciodef wait_for_clock(hour, minute, result=None):    t = datetime.datetime.combine(        datetime.date.today(),        datetime.time(hour, minute)    )        tt = datetime.datetime.now()        if tt >= t:        t += datetime.timedelta(days=1)        delta = t - tt    delta_sec = delta.seconds + delta.microseconds * 0.000001        return asyncio.sleep(delta_sec, result)async def do_that():    today = date.today()        Date = today        headers = {        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',        'Accept-Language': 'en-US, en;q=0.5'}    URL = ['https://www.amazon.com/Dove-Intensive-Concentrate-Technology-Protects/dp/B0B1VVXTKL',           'https://www.amazon.com/Dove-Intensive-Concentrate-Conditioner-Technology/dp/B0B1VXFLQ2']    data = []    for url in URL:        webpage = requests.get(url, headers=headers)        soup = BeautifulSoup(webpage.content)        data.append({            'Rank': soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("Best Seller")').contents[                2].get_text().split()[0],            # 'rank': soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("Best Seller")').contents[2].get_text().split()[0].replace('#', '').split(),            'Category': " ".join(                soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("Best Seller")').contents[                    2].get_text().split()[2:6]),            'Sub-Category Rank':                soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("Best Seller")').contents[                    5].get_text().split()[0],            'Sub-Category': " ".join(                soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("Best Seller")').contents[                    5].get_text().split()[2:6]),            # ASIN            'ASIN': soup.select_one('#detailBulletsWrapper_feature_div span:-soup-contains("ASIN")').contents[                3].get_text(),            # Product Title            'Product Title': soup.find("span", attrs={"id": 'productTitle'}).text.strip(),            'Date': Date        })        df = pd.DataFrame(data)    df['Rank'] = df['Rank'].str.replace('#', '')    df['Sub-Category Rank'] = df['Sub-Category Rank'].str.replace('#', '')    # to local file    df.to_csv(local_path, mode='a', header=False, index=False)if __name__ == '__main__':    while True:        asyncio.run(wait_for_clock(8, 0))        asyncio.run(do_that())And instead of shedule library, i using here just own code for waiting on clock (also I wrote that in &#8220;asynchronous&#8221; way, but you can also change function do_that to be sync and then at bottom, instead of asyncio.run(do_that()), just &#8220;do_that()&#8221;)

Advertisement

Answer