I'm crawling with Selenium and Beautiful Soup with Python.
wlist = list(range(10))
driver = webdriver.Chrome('//chromedriver')
driver.get('url')
time.sleep(1)
for i in wlist:
thumb = driver.find_elements_by_class_name('tit_area')
thumb[i].click()
time.sleep(1)
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
w_title = soup.find('span', {'class': 'tit_toon'})
print(w_title.text.strip())
w_pdate_e = soup.find_all('p', {'class': 'epsode_date'})
if w_pdate_e != None:
print(w_pdate_e[0].text.strip())
print(w_pdate_e[-1].text.strip())
else:
pass
w_pdate_t = soup.find_all('td', {'class': 'toon_date'})
if w_pdate_t != None:
print(w_pdate_t[0].text.strip())
print(w_pdate_t[-1].text.strip())
else:
pass
driver.back()
time.sleep(1)
When scratching text, how can I get both conditions when the class name is 'epsod_date' and the class name is 'ton_date' for each page?
python selenium beautifulsoup
import requests
from bs4 import BeautifulSoup as bs
url1 = 'https://toptoon.com/weekly/ep_list/hug_me_boss' # toon_date
url2 = 'https://toptoon.com/weekly/ep_list/she_NA' # epsode_date
r = requests.get(url1)
s = bs(r.text,'html.parser')
title = s.find_all('span', {'class': 'tit_toon'})
update = s.find('p', {'class': 'epsode_date'}) if s.find('p', {'class': 'epsode_date'}) is not None else s.find('td', {'class': 'toon_date'})
#update = s.find_all('p', {'class': 'epsode_date'}) if s.find_all('p', {'class': 'epsode_date'}) != [] else s.find_all('td', {'class': 'toon_date'})
Return to [] when there is no result when it is set to find_all. Instead of returning it to None, you have to process it with the comment in the code above.
But epsode_date is not a typo.-;;;
© 2024 OneMinuteCode. All rights reserved.