Rotate the desired list loop, save the result as each item.

Asked 2 years ago, Updated 2 years ago, 67 views

Hello, I'm Python kid. I have a question I'm making a phishing site inspection program.

" When entering A.com, run the program once, save the file under the A.com.txt file name, Continue working on B.com " I'd like to change it like this

More specifically,

In the file domain.txt, https://www.google.com/ https://www.youtube.com/ https://www.yahoo.co.jp/ https://www.amazon.co.jp/ https://www.google.co.jp/ https://twitter.com/ https://www.facebook.com/ https://www.wikipedia.org/ If it's like this, When all operations on one site are completed, work on the next site How should I change it...?

This is the code.

import sys
import time
#import MeCab
import requests
import pandas as pd
from bs4 import BeautifulSoup
#import chromedriver_binary
from selenium import webdriver
import re
import chromedriver_binary
#import firefox_screenshot
from PIL import Image

url=raw_input("Enter the string value : ")#input the url from the user
Firefoxbrowser = webdriver.Firefox()
Firefoxbrowser.get(url)#to open the webpage
Firefoxbrowser.save_screenshot('/Users/eunhyulkim/pshing/test.png')
Firefoxbrowser.quit()

p = re.compile(r"(?:https?|ftps?)://([A-Za-z0-9-]{1,63}\.)*([A-Za-z0-9-]{1,63}\.)([A-Za-z0-9-]{1,63})/?[A-Za-z0-9.\-?=#%/]*")
z = re.compile(r"(?:https?|ftps?)://([A-Za-z0-9-]{1,63}\.)*(?:(com)|(org)|([A-Za-z0-9-]{1,63}\.)([A-Za-z0-9-]{1,63}))/?[A-Za-z0-9.\-?=#%/]*")


def re(url, num):
    if num == 0:
        m = p.match(url)
    else:
        m = z.match(url)
    if m:
        return "".join(map(str, m.groups('')))

def google_image_search(url):
    options = webdriver.ChromeOptions()
    #options.add_argument('--headless')
    driver = webdriver.Chrome('/usr/local/bin/chromedriver')
    #driver = webdriver.Chrome('/usr/local/lib/python2.7/dist-packages/chromedriver_binary', options=options)
    #driver = webdriver.Chrome()
    driver.get('https://www.google.co.jp/imghp?hl=ja')
    time.sleep(5)
    driver.find_element_by_class_name('BwoPOe').click()
    time.sleep(0.5)
    print('get')
    #driver.find_element_by_class_name('bd qbtbtxt qbclr').click
    print('quit')
    elm = driver.find_element_by_id('qbfile')
    elm.send_keys('/Users/eunhyulkim/pshing/test.png')
    time.sleep(2)

    data = driver.page_source.encode('utf-8')
    html = BeautifulSoup(data, "html.parser")

    title_lst = []
    url_lst = []

    for diva in html.find_all("div"):
        line = diva.get('class')
        line2 = ''
        #print(line)
        if line != None:
            line2 = line[0].decode('unicode-escape')
        else:
            continue
        try:
            #print(diva)
            element = diva.find_element_by_class_name("iUh30")
        except TypeError:
            pass
        #print(element)
        if line2 == "r":
            #print(diva)

            for tit in diva.find_all("h3"):
                try:
                    print(tit.text)
                    title_lst.append(tit.text)
                except TypeError:
                    pass
            for link in diva.find_all("a"):
                line5 = link.get('href')
                print(line5)

                try:
                    url_lst.append(line5)
                except TypeError:
                    pass

                break

    screen = url + 'image_list.txt'
    fh = open(screen, 'w')
    for g in range(0, len(title_lst)):
        print(type(title_lst[g]))
        print(type(url_lst[g]))
        print(type(title_lst[g].encode('utf-8')))
        title = title_lst[g].encode('utf-8')
        url = url_lst[g].encode('utf-8')
        domein1 = re(url, 0)
        domein2 = re(url, 1)
        fh.write(title + ",,," + url + ",,," + domein1 + ",,," + domein2 + "\n")

    fh.close()

    driver.quit()

google_image_search('/url/')

Thank you for your kind cooperation <

for

2022-09-22 16:08

1 Answers

Please refer to the code below. Sample working on each line in the domain.txt file.

https://www.google.com
https://www.youtube.com
https://www.yahoo.co.jp
https://www.amazon.co.jp
https://www.google.co.jp
https://twitter.com 
https://www.facebook.com
https://www.wikipedia.org
import re

def do_action(domain):
    domain = re.findall(r'https?://([A-Za-z_0-9.-]+.[a-z]'), domain)[0] # https://http://.
    with open(f'{domain}_image_list.txt', 'w') as f:
        f.write('anything...')

with open('domain.txt', 'r') as f:
    for domain in f:
        do_action(domain.rstrip()) # \n Remove and replace domain


2022-09-22 16:08

If you have any answers or tips


© 2024 OneMinuteCode. All rights reserved.