Python output error

http://dart.fss.or.kr/dsac001/mainAll.do

I made a code to get a list based on the code that Iluno told me on the above site.

As I intended, I had to print out the top five lists that were recently uploaded, and then if there is no new list afterwards, I had to print [no disclosure] and print out a new list if there is one.

However, the other part seems to be working properly, but there was a problem with printing only the 5th list of the top lists, not the 5th list, during the initial run.

Which part should I touch??

from bs4 import BeautifulSoup
import urllib.request
import re
from apscheduler.schedulers.blocking import BlockingScheduler

sched = BlockingScheduler()

old_darts = []

def extract_darts(old_darts=[]):
    with urllib.request.urlopen("http://dart.fss.or.kr/dsac001/mainAll.do") as response:
        html = response.read()
        soup = BeautifulSoup(html, 'html.parser')

    trs = soup.findAll('tr')[1:5+1]
    for tr in trs:
        td = tr.findAll('td')
        company = re.sub(r'[\t\n\r ]', '', td[1].find('a').text) 
        report = re.sub(r'[\t\n\r ]', '', tr.findAll('td')[2].find('a').text) 
        report_link = 'dart.fss.or.kr' + tr.findAll('td')[2].find('a').attrs['href']

        dart_list = company,report,report_link

        darts = []
    for report_link in dart_list:
        dart = report_link
        darts.append(dart)

    new_darts=[]
    for dart in darts:
        if dart not in old_darts:
            new_darts.append(dart)

    return new_darts

def print_darts():
    global old_darts
    new_darts = extract_darts(old_darts)
    if new_darts:
        for dart in new_darts:
            print(dart)
    else:
        print ('[No Announcement]')
    old_darts += new_darts.copy()
    old_darts = list(set(old_darts))

print_darts()

sched.add_job(print_darts, 'interval', seconds=3)

sched.start()

python

2022-09-21 12:28

1 Answers

*For convenience, the information (list) you are trying to print will be called 'disclosure information'.

Invalid substitution of the disclosure information in extract_darts() to dart_list.

If you specify the disclosure information as an equal sign = in the dart_list, the disclosure information will continue to be overwritten in the for statement.

Then, when the for statement ends, the dart_list value becomes the last (5th) disclosure result.

This is why the output results are only the last (5th) disclosure information.

for tr in trs:
    # Omitted
    dart_list = company, report, report_link # Invalid
# dart_list value : 5th disclosure information

In order to work as you intended, the information must be appended() to the dart_list.

dart_list = []
for tr in trs:
    Out #
    dart_list.append([company,report, report_link])
# list value : [The first disclosure of information, the second official information, 5, and the fifth official information]. _ dart

The following is the full code that has been modified.

from bs4 import BeautifulSoup
import urllib.request
import re
from apscheduler.schedulers.blocking import BlockingScheduler

sched = BlockingScheduler()

old_darts = []

def extract_darts(old_darts=[]):
    with urllib.request.urlopen("http://dart.fss.or.kr/dsac001/mainAll.do") as response:
        html = response.read()
        soup = BeautifulSoup(html, 'html.parser')

    trs = soup.findAll('tr')[1:5+1]
    dart_list = [] # Added
    for tr in trs:
        td = tr.findAll('td')
        company = re.sub(r'[\t\n\r ]', '', td[1].find('a').text) 
        report = re.sub(r'[\t\n\r ]', '', tr.findAll('td')[2].find('a').text) 
        report_link = 'dart.fss.or.kr' + tr.findAll('td')[2].find('a').attrs['href']

        dart_list.append([company, report, report_link]) # Modified

        darts = []

    for report_link in dart_list:
        dart = report_link
        darts.append(dart)

    new_darts=[]
    for dart in darts:
        if dart not in old_darts:
            new_darts.append(dart)

    return new_darts

def print_darts():
    global old_darts
    new_darts = extract_darts(old_darts)
    if new_darts:
        for dart in new_darts:
            print(dart)
    else:
        print ('[No Announcement]')
    old_darts += new_darts.copy()
    old_darts = list(map(list, set(map(tuple,old_darts))) ) # Modified part (list cannot be converted to matrix, so convert to triple form and then matrix)

print_darts()

sched.add_job(print_darts, 'interval', seconds=3)

sched.start()

I hope your answer was helpful!

2022-09-21 12:28

If you have any answers or tips

Popular Tags

python x 4647

android x 1593

java x 1494

javascript x 1427

c x 927

c++ x 878

ruby-on-rails x 696

php x 692

python3 x 685

html x 656