Hello, I'm Python kid. I have a question I'm making a phishing site inspection program.
" When entering A.com, run the program once, save the file under the A.com.txt file name, Continue working on B.com " I'd like to change it like this
More specifically,
In the file domain.txt, https://www.google.com/ https://www.youtube.com/ https://www.yahoo.co.jp/ https://www.amazon.co.jp/ https://www.google.co.jp/ https://twitter.com/ https://www.facebook.com/ https://www.wikipedia.org/ If it's like this, When all operations on one site are completed, work on the next site How should I change it...?
This is the code.
import sys
import time
#import MeCab
import requests
import pandas as pd
from bs4 import BeautifulSoup
#import chromedriver_binary
from selenium import webdriver
import re
import chromedriver_binary
#import firefox_screenshot
from PIL import Image
url=raw_input("Enter the string value : ")#input the url from the user
Firefoxbrowser = webdriver.Firefox()
Firefoxbrowser.get(url)#to open the webpage
Firefoxbrowser.save_screenshot('/Users/eunhyulkim/pshing/test.png')
Firefoxbrowser.quit()
p = re.compile(r"(?:https?|ftps?)://([A-Za-z0-9-]{1,63}\.)*([A-Za-z0-9-]{1,63}\.)([A-Za-z0-9-]{1,63})/?[A-Za-z0-9.\-?=#%/]*")
z = re.compile(r"(?:https?|ftps?)://([A-Za-z0-9-]{1,63}\.)*(?:(com)|(org)|([A-Za-z0-9-]{1,63}\.)([A-Za-z0-9-]{1,63}))/?[A-Za-z0-9.\-?=#%/]*")
def re(url, num):
if num == 0:
m = p.match(url)
else:
m = z.match(url)
if m:
return "".join(map(str, m.groups('')))
def google_image_search(url):
options = webdriver.ChromeOptions()
#options.add_argument('--headless')
driver = webdriver.Chrome('/usr/local/bin/chromedriver')
#driver = webdriver.Chrome('/usr/local/lib/python2.7/dist-packages/chromedriver_binary', options=options)
#driver = webdriver.Chrome()
driver.get('https://www.google.co.jp/imghp?hl=ja')
time.sleep(5)
driver.find_element_by_class_name('BwoPOe').click()
time.sleep(0.5)
print('get')
#driver.find_element_by_class_name('bd qbtbtxt qbclr').click
print('quit')
elm = driver.find_element_by_id('qbfile')
elm.send_keys('/Users/eunhyulkim/pshing/test.png')
time.sleep(2)
data = driver.page_source.encode('utf-8')
html = BeautifulSoup(data, "html.parser")
title_lst = []
url_lst = []
for diva in html.find_all("div"):
line = diva.get('class')
line2 = ''
#print(line)
if line != None:
line2 = line[0].decode('unicode-escape')
else:
continue
try:
#print(diva)
element = diva.find_element_by_class_name("iUh30")
except TypeError:
pass
#print(element)
if line2 == "r":
#print(diva)
for tit in diva.find_all("h3"):
try:
print(tit.text)
title_lst.append(tit.text)
except TypeError:
pass
for link in diva.find_all("a"):
line5 = link.get('href')
print(line5)
try:
url_lst.append(line5)
except TypeError:
pass
break
screen = url + 'image_list.txt'
fh = open(screen, 'w')
for g in range(0, len(title_lst)):
print(type(title_lst[g]))
print(type(url_lst[g]))
print(type(title_lst[g].encode('utf-8')))
title = title_lst[g].encode('utf-8')
url = url_lst[g].encode('utf-8')
domein1 = re(url, 0)
domein2 = re(url, 1)
fh.write(title + ",,," + url + ",,," + domein1 + ",,," + domein2 + "\n")
fh.close()
driver.quit()
google_image_search('/url/')
Thank you for your kind cooperation <
for
Please refer to the code below. Sample working on each line in the domain.txt file.
https://www.google.com
https://www.youtube.com
https://www.yahoo.co.jp
https://www.amazon.co.jp
https://www.google.co.jp
https://twitter.com
https://www.facebook.com
https://www.wikipedia.org
import re
def do_action(domain):
domain = re.findall(r'https?://([A-Za-z_0-9.-]+.[a-z]'), domain)[0] # https://http://.
with open(f'{domain}_image_list.txt', 'w') as f:
f.write('anything...')
with open('domain.txt', 'r') as f:
for domain in f:
do_action(domain.rstrip()) # \n Remove and replace domain
© 2024 OneMinuteCode. All rights reserved.