I want to send it to Telegram Bot using Naver News Crawling using keywords.
I'm a beginner at coding, but I googled with YouTube and made the code using Python and Visual Studio code as follows
#### Load library
import requests
from bs4 import BeautifulSoup
import telegram
from apscheduler.schedulers.blocking import BlockingScheduler
####Search Keyword
search_word = 'Waste landfill'
####Create Telegram Bot
token = '2144842764:AAEB_1dpdpCxeUnX2rY0PnFMqrt2O6H5c0A'
bot = telegram.Bot(token=token)
####Create Scheduler
sched = BlockingScheduler()
####List to keep the previously sent link
old_links=[]
####Link extraction function
def extract_links(old_links=[]):
url = f'https://m.search.naver.com/search.naver?sm=mtp_hty.top&where=m&query={search_word}'
req = requests.get(url)
html = req.text
soup = BeautifulSoup(html, 'html.parser')
search_result = soup.select_one('#news_result_list')
news_list = search_result.select('.bx >.news_wrap >a')
links = []
for news in news_list[:5]:
link = news['href']
links.append(link)
new_links=[]
for link in links:
if link not in old_links:
new_links.append(link)
print(new_links)
return new_links
####Telegram Message Transfer Function
def send_links():
global old_links
new_links = extract_links(old_links)
if new_links:
for link in new_links:
bot.sendMessage(chat_id='879999105', text = link)
else:
bot.sendMessage (chat_id='879999105', text='no new news')
old_links += new_links.copy()
old_links = list(set(old_links))
####First start
send_links()
####Setting and operating the scheduler
sched.add_job(send_links, 'interval', seconds=30)
sched.start()
===Error message ======
PS C:\Users\110121\Documents\projects> & C:/Users/110121/AppData/Local/Programs/Python/Python310/python.exe c:/Users/110121/Documents/projects/waste_landfill.py
Traceback (most recent call last):
File "c:\Users\110121\Documents\projects\waste_landfill.py", line 54, in <module>
send_links()
File "c:\Users\110121\Documents\projects\waste_landfill.py", line 44, in send_links
new_links = extract_links(old_links)
File "c:\Users\110121\Documents\projects\waste_landfill.py", line 26, in extract_links
news_list = search_result.select('.bx >.news_wrap >a')
AttributeError: 'NoneType' object has no attribute 'select'
PS C:\Users\110121\Documents\projects>
I need your help, masters.
crawling telegram
I have two questions.
Modify to
#search_result = soup.select_one('#news_result_list')
news_list = soup.select('.bx >.news_wrap >a')
© 2024 OneMinuteCode. All rights reserved.