import re content = ''' /images/lol/hallo.png /images/lol/hallo.png //example.com/images/lol/hallo.png http://example.com/images/lol/hallo.png https://example.com/images/lol/hallo.png ''' def parse_active_urls(html_text): regexp = r'|(?P(http(s?):)?/?/.+?\.[\w\d]+)' result = [item[0] for item in re.findall(regexp, html_text) if item[0]] return result def main(): result = parse_active_urls(content) for item in result: print(item) if __name__ == '__main__': main()