from html.parser import HTMLParser
br = urllib.request.urlopen('http://www.google.com')
html = str(br.read())
print(html)
You don't just print out HTML from the same source.
<a class=gb1 href="http://www.youtube.com/?gl=KR&tab=w1">I only want to extract YouTube with tags and elements removed from YouTube</a>
.
How do I remove all tags and elements?
html5 python
Striping HTML varies slightly depending on the Python version.
from HTMLParser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
from html.parser import HTMLParser
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.strict = False
self.convert_charrefs= True
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
from html.parser import HTMLParser
import urllib.request
#Function code appropriate for version here
br = urllib.request.urlopen('http://www.google.com')
html = br.read()
print(strip_tags(html))
© 2024 OneMinuteCode. All rights reserved.