Hello, I'm a beginner at Python.
I'm doing web scraping on a site called fnguide, and I want to find the necessary data
Both classes are the same.Ha...
This is really unexpected, so I'm embarrassed.
How do I solve this?
I'd appreciate it if you could answer me.
To be more precise, the class of annual and quarterly data is the same.
# -*- coding: utf-8 -*-
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import numpy as np
code = '298000'
# Importing data
fs_rpt_url = "http://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A{}&cID=&MenuYn=Y&ReportGB=&NewMenuID=103&stkGb=701".format(code)
fs_rpt_res = requests.get(fs_rpt_url)
soup = BeautifulSoup(fs_rpt_res.text, "lxml")
# Set column + Create Data Frame
columns = soup.find("table", attrs = {"class":"us_table_ty1 h_fix zigbg_no"}).find("thead").find_all("th")
data_column = [column.get_text().strip() for column in columns][1:7]
come_sheet_df = pd.DataFrame (data=None, columns=data_column, index=['practice'])
# Find data
data_rows = soup.find("table", attrs = {"class":"us_table_ty1 h_fix zigbg_no"}).find("tbody").find_all("tr")
for row in data_rows:
columns = row.find_all("td")
data_row = [column.get_text().strip() for column in columns]
income_sheet_df = income_sheet_df.append(pd.Series(data_row, index=income_sheet_df.columns), ignore_index=True)
# Make a data frame pretty
financial_index = ['Exercise', 'Sales', 'Sales Cost', 'Sales Gross Profit', 'Sales and Management Expense', 'R&D Expense', 'Advertising Expense', 'Sales Expense', 'Other Cost Cost', 'Operating Profit', 'Financial Profit', 'Exchange-Value', 'Financial Loss or Loss or Loss', 'Financial Revenues'Losses', 'Default Expenses', 'Fair Value Measurement Financial Asset-Related Losses', 'Trading Bond Disposal Loss', 'Financial Asset Disposition Loss', 'Financial Asset Assessment Loss', 'Other Financial Cost', 'Other Income', 'Income', 'Dividend Income', 'Exchange Income', 'Disposal of Inventory Disposal (Profit), 'Fair Income', 'Requirement of Assets', 'Validity', 'Requity', 'Requirement', 'Requity' Interest expense', 'foreign exchange loss', 'inventory loss', 'disposal loss', 'fair value-measured financial asset valuation loss through profit or loss', 'disposal loss', 'asset valuation loss', 'derivative loss', 'other loan loss', 'provision transfer', 'other', 'subsidiary, joint-contributing expense', 'subsidiary', 'subsidiary' and 'contributive profit or loss'
income_sheet_df.index = financial_index
come_sheet_df = come_sheet_df.drop ("practice")
# # income_sheet_df.to_excel('income_sheet.xlsx')
print(income_sheet_df)
Because the div ID value of the parent tag is different, the tag is used.
data_rows = soup.find("table", attrs = {"class":"us_table_ty1 h_fix zigbg_no"}).find("tbody").find_all("tr")
=>
data_rows = soup.select_one('div#divSonikY > table > tbody > tr')
© 2024 OneMinuteCode. All rights reserved.