I have this url here, and I’m trying to get the video’s source link, but it’s located within an iframe. The video url is https://ndisk.cizgifilmlerizle.com...
inside an iframe called vjs_iframe
. My code is below:
import requests from bs4 import BeautifulSoup url = "https://m.wcostream.com/my-hero-academia-season-4-episode-5-english-dubbed" r = requests.Session() headers = {"User-Agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:75.0) Gecko/20100101 Firefox/75.0"} # Noticed that website responds better with headers req = r.get(url, headers=headers) soup = BeautifulSoup(req.content, 'html.parser') iframes = soup.find_all("iframe") # Returns an empty list vjs_iframe = soup.find_all(class_="vjs_iframe") # Also returns an empty list
I don’t know how to get the url within the iframe, since not even the iframe’s source is loaded upon the first request. Is getting the https://ndisk.cizgifilmlerizle.com...
url even possible using BeautifulSoup
or would I need to use another library like selenium
or something else? Thanks in advance!
My approach to scraping their stuff is as follows. Idk if you need this anymore, but I was searching for problems with that https://ndisk.cizgifilmlerizle.com
website, and saw this. Figured it might help someone else. It’s crude, but gets the job done.
import time from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.chrome.options import Options from selenium.webenter code heredriver.common.by import By from selenium.webdriver.common.keys import Keys from bs4 import BeautifulSoup from time import sleep import os import string # tab 5, space, up arrow 2, space def do_keys(key, num_times, action_chain): for x in range(num_times): action_chain.send_keys(key) def cls(): print("33[2J") # Press the green button in the gutter to run the script. if __name__ == '__main__': count = 274 # Stuck on 274 - 500. 273 also failed. attempts = 0 while count < 501: url = f"https://www.wcostream.com/naruto-shippuden-episode-{count}" video_dir = f"{os.path.dirname(os.path.realpath(__file__))}\videos\" default_video_name = f"{video_dir}getvid.mp4" if not os.path.exists(video_dir): os.mkdir(video_dir) options = Options() options.add_argument('--headless') options.add_argument('--mute-audio') options.add_experimental_option("prefs", { "download.default_directory": video_dir, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True }) browser = webdriver.Chrome(options=options) # browser = webdriver.Chrome() browser.get(url) sleep(1) title_element = None try: title_element = browser.find_element(By.XPATH, "//*[@id="content"]/table/tbody/tr/td[1]/table/tbody/tr/td/table[1]/tbody/tr[2]/td/div[2]/b/b[1]") except Exception as e: title_element = browser.find_element(By.XPATH, "//*[@id="content"]/table/tbody/tr/td[1]/table/tbody/tr/td/table[1]/tbody/tr[2]/td/div[2]/b[2]") title = title_element.text.lower().translate(str.maketrans('', '', string.punctuation)).replace(' ', '_') new_video_name = f"{video_dir}episode_{count}_{title}.mp4" cls() print(f"Title: {title}") # Below is working. browser.switch_to.frame(browser.find_element(By.XPATH, "//*[@id="frameNewcizgifilmuploads0"]")) results = browser.page_source soup = BeautifulSoup(results, "html.parser") video_url = soup.find("video").get("src") print(f"URL:t{video_url}") browser.get(video_url) element = browser.find_element(By.TAG_NAME, "video") sleep(1) actions = ActionChains(browser) actions.send_keys(Keys.SPACE) actions.perform() sleep(1) do_keys(Keys.TAB, 5, actions) do_keys(Keys.SPACE, 1, actions) do_keys(Keys.UP, 2, actions) do_keys(Keys.SPACE, 1, actions) actions.perform() start = time.time() print(f"Downloading: {new_video_name}") # # # browser.get(video_url) # print(browser) # # # print(results) # print(f"{video_url}") browser_open = True timeout = 0 while browser_open: if os.path.isfile(default_video_name): if os.path.exists(new_video_name): os.remove(default_video_name) end = time.time() print(f"Already Exists! [{end - start}s]") else: os.rename(default_video_name, new_video_name) end = time.time() print(f"Download complete! [{end - start}s]") count += 1 browser_open = False browser.close() try: _ = browser.window_handles except Exception as e: browser_open = False if timeout > 50: attempts += 1 print(f"Download Timed Out. Trying again. [{attempts}]") browser_open = False browser.close() else: attempts = 0 timeout += 1 sleep(1)