from HTMLParser import HTMLParser import urllib import sys class parselinks(HTMLParser): def __init__(self): self.data=[] self.href=0 self.linkname='' HTMLParser.__init__(self) def handle_starttag(self,tag,attrs): if tag =='a': for name,value in attrs: if name == 'href': self.href=1 def handle_data(self,data): if self.href: self.linkname+=data def handle_endtag(self,tag): if tag=='a': self.linkname=''.join(self.linkname.split()) self.linkname=self.linkname.strip() if self.linkname: self.data.append(self.linkname) self.linkname='' self.href=0 def getresult(self): for value in self.data: print value if __name__=="__main__": IParser = parselinks() IParser.feed(urllib.urlopen("http://www.python.org/index.html").read()) IParser.getresult() IParser.close()