Python Web crawling data has different tags

Asked 2 years ago, Updated 2 years ago, 13 views

Cropped web page: https://finance.yahoo.com/quote/AWR?p=AWR

The tag of other imported parts is span, but only the part of Forward Divide & Yield is td, so I modified this part to write a code to represent all the information...But it doesn't work. I ask for your help me.

<Original Code>

import pandas as pd
import datetime
import requests
import yfinance as yf
import time
from requests.exceptions import ConnectionError
from bs4 import BeautifulSoup



def web_content_div(web_content,class_path):
    web_content_div = web_content.find_all('div',{'class': class_path})
    try:
        spans = web_content_div[0].find_all('span')
        texts = [span.get_text() for span in spans]

    except IndexError:
        texts = []

    return texts

def real_time_price(stock_code):

    url = 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code 

    try :
        r = requests.get(url)
        web_content = BeautifulSoup(r.text,'lxml')
        texts = web_content_div(web_content, 'My(6px) Pos(r) smartphone_Mt(6px)')
        if texts != []:
            price, change = texts[0],texts[1]
        else:
            price , change = [] , []

    #ForwardDividend & Yield#########################################################################################################################################
        texts = web_content_div(web_content,'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)')
        if texts != []:
            for count, forword in enumerate(texts):
                if forword == 'Forward Dividend & Yield':
                   dividend = texts[count + 1]
        else:
            dividend = []
    ####################################################################################################################################################

        texts = web_content_div(web_content,'D(ib) W(1/2) Bxz(bb) Pstart(12px) Va(t) ie-7_D(i) ie-7_Pos(a) smartphone_D(b) smartphone_W(100%) smartphone_Pstart(0px) smartphone_BdB smartphone_Bdc($seperatorColor)')
        if texts != []:
            for count, EX in enumerate(texts):
                if EX == 'Ex-Dividend Date':
                    EXdate = texts[count + 1]
        else:
            EXdate = []

        texts = web_content_div(web_content,'D(ib) W(1/2) Bxz(bb) Pend(12px) Va(t) ie-7_D(i) smartphone_D(b) smartphone_W(100%) smartphone_Pend(0px) smartphone_BdY smartphone_Bdc($seperatorColor)')
        if texts != []:
            for count, vol in enumerate(texts):
                if vol == 'Volume':
                    volume = texts[count + 1]
        else:
            volume = []


    except ConnectionError:
        price, change, dividend, EXdate,volume = [],[],[],[],[]

    return price, change, dividend, EXdate,volume


stock=['awr']


while(True):
    info = []
    col = []
    time_stamp = datetime.datetime.now() - datetime.timedelta(hours=14)
    time_stamp = time_stamp.strftime('%Y-%M-%D %H:%M:%S')
    for stock_code in stock:
        price, change,dividend, EXdate,volume = real_time_price(stock_code)
        info.append(price)
        info.extend([change])
        info.extend([dividend])
        info.extend([EXdate])
        info.extend([volume])
        time.sleep(1)

    col = [time_stamp]
    col.extend(info)
    print(col)

Results

2021-51-03/16/21 03:51:27', '72.38', '+0.22 (+0.30%)', 'Ex-Dividend Date', 'Feb 12, 2021', '203,953'

Among them, I would like to modify 'Ex-divided date' to be 1.34 (1.86%) corresponding to Forward Divided & Yield.

python

2022-09-20 17:39

1 Answers

The cord is long. You have created only the necessary parts.

You are using the data provided in HTML as lxml.

I don't know what kind of data we need, but... Extracts can be made using the following formula:

import requests
from bs4 import BeautifulSoup

def real_time_price(stock_code = 'AWR'):

    url = 'https://finance.yahoo.com/quote/' + stock_code + '?p=' + stock_code
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')

    price = soup.select_one(r'span.Mb\(-4px\).D\(ib\)').text
    rate = soup.select_one(r'span.Fw\(500\).Fz\(24px\)').text
    Earnings_Date = soup.select_one(r'div.smartphone_Pstart\(0px\).smartphone_BdB.smartphone_Bdc\(\$seperatorColor\) > table > tbody > tr:nth-of-type(5) > td.Ta\(end\).Fw\(600\).Lh\(14px\)').text
    Yield = soup.select_one(r'#quote-summary > div.D\(ib\).W\(1\/2\).Bxz\(bb\).Pstart\(12px\).Va\(t\).ie-7_D\(i\).ie-7_Pos\(a\).smartphone_D\(b\).smartphone_W\(100\%\).smartphone_Pstart\(0px\).smartphone_BdB.smartphone_Bdc\(\$seperatorColor\) > table > tbody > tr:nth-of-type(6) > td.Ta\(end\).Fw\(600\).Lh\(14px\)').text
    Volume = soup.select_one(r'#quote-summary > div.D\(ib\).W\(1\/2\).Bxz\(bb\).Pend\(12px\).Va\(t\).ie-7_D\(i\).smartphone_D\(b\).smartphone_W\(100\%\).smartphone_Pend\(0px\).smartphone_BdY.smartphone_Bdc\(\$seperatorColor\) > table > tbody > tr:nth-of-type(7) > td.Ta\(end\).Fw\(600\).Lh\(14px\) > span').text
    Dividend_Date = soup.select_one(r'#quote-summary > div.D\(ib\).W\(1\/2\).Bxz\(bb\).Pstart\(12px\).Va\(t\).ie-7_D\(i\).ie-7_Pos\(a\).smartphone_D\(b\).smartphone_W\(100\%\).smartphone_Pstart\(0px\).smartphone_BdB.smartphone_Bdc\(\$seperatorColor\) > table > tbody > tr:nth-of-type(7) > td.Ta\(end\).Fw\(600\).Lh\(14px\) > span').text

    print([Earnings_Date, price, rate, Yield, Dividend_Date, Volume])


real_time_price()

>> ['May 03, 2021 - May 07, 2021', '73.07', '+0.69 (+0.95%)', '1.34 (1.86%)', 'Feb 12, 2021', '138,368']


2022-09-20 17:39

If you have any answers or tips


© 2024 OneMinuteCode. All rights reserved.