http://dart.fss.or.kr/dsac001/mainAll.do
I made a code to get a list based on the code that Iluno told me on the above site.
As I intended, I had to print out the top five lists that were recently uploaded, and then if there is no new list afterwards, I had to print [no disclosure] and print out a new list if there is one.
However, the other part seems to be working properly, but there was a problem with printing only the 5th list of the top lists, not the 5th list, during the initial run.
Which part should I touch??
from bs4 import BeautifulSoup
import urllib.request
import re
from apscheduler.schedulers.blocking import BlockingScheduler
sched = BlockingScheduler()
old_darts = []
def extract_darts(old_darts=[]):
with urllib.request.urlopen("http://dart.fss.or.kr/dsac001/mainAll.do") as response:
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
trs = soup.findAll('tr')[1:5+1]
for tr in trs:
td = tr.findAll('td')
company = re.sub(r'[\t\n\r ]', '', td[1].find('a').text)
report = re.sub(r'[\t\n\r ]', '', tr.findAll('td')[2].find('a').text)
report_link = 'dart.fss.or.kr' + tr.findAll('td')[2].find('a').attrs['href']
dart_list = company,report,report_link
darts = []
for report_link in dart_list:
dart = report_link
darts.append(dart)
new_darts=[]
for dart in darts:
if dart not in old_darts:
new_darts.append(dart)
return new_darts
def print_darts():
global old_darts
new_darts = extract_darts(old_darts)
if new_darts:
for dart in new_darts:
print(dart)
else:
print ('[No Announcement]')
old_darts += new_darts.copy()
old_darts = list(set(old_darts))
print_darts()
sched.add_job(print_darts, 'interval', seconds=3)
sched.start()
*For convenience, the information (list) you are trying to print will be called 'disclosure information'.
Invalid substitution of the disclosure information in extract_darts() to dart_list.
If you specify the disclosure information as an equal sign =
in the dart_list, the disclosure information will continue to be overwritten in the for statement.
Then, when the for statement ends, the dart_list value becomes the last (5th) disclosure result.
This is why the output results are only the last (5th) disclosure information.
for tr in trs:
# Omitted
dart_list = company, report, report_link # Invalid
# dart_list value : 5th disclosure information
In order to work as you intended, the information must be appended() to the dart_list.
dart_list = []
for tr in trs:
Out #
dart_list.append([company,report, report_link])
# list value : [The first disclosure of information, the second official information, 5, and the fifth official information]. _ dart
The following is the full code that has been modified.
from bs4 import BeautifulSoup
import urllib.request
import re
from apscheduler.schedulers.blocking import BlockingScheduler
sched = BlockingScheduler()
old_darts = []
def extract_darts(old_darts=[]):
with urllib.request.urlopen("http://dart.fss.or.kr/dsac001/mainAll.do") as response:
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
trs = soup.findAll('tr')[1:5+1]
dart_list = [] # Added
for tr in trs:
td = tr.findAll('td')
company = re.sub(r'[\t\n\r ]', '', td[1].find('a').text)
report = re.sub(r'[\t\n\r ]', '', tr.findAll('td')[2].find('a').text)
report_link = 'dart.fss.or.kr' + tr.findAll('td')[2].find('a').attrs['href']
dart_list.append([company, report, report_link]) # Modified
darts = []
for report_link in dart_list:
dart = report_link
darts.append(dart)
new_darts=[]
for dart in darts:
if dart not in old_darts:
new_darts.append(dart)
return new_darts
def print_darts():
global old_darts
new_darts = extract_darts(old_darts)
if new_darts:
for dart in new_darts:
print(dart)
else:
print ('[No Announcement]')
old_darts += new_darts.copy()
old_darts = list(map(list, set(map(tuple,old_darts))) ) # Modified part (list cannot be converted to matrix, so convert to triple form and then matrix)
print_darts()
sched.add_job(print_darts, 'interval', seconds=3)
sched.start()
I hope your answer was helpful!
© 2025 OneMinuteCode. All rights reserved.