Forked from gruber/Liberal Regex Pattern for Web URLs
          
        
    
          Last active
          July 30, 2024 22:09 
        
      - 
      
 - 
        
Save winzig/8894715 to your computer and use it in GitHub Desktop.  
Revisions
- 
        
winzig revised this gist
Jul 31, 2018 . No changes.There are no files selected for viewing
 - 
        
winzig revised this gist
Jul 31, 2018 . 1 changed file with 20 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,23 +1,21 @@ # Single-line version: (?i)\b(https?:\/{1,3})?((?:(?:[\w.\-]+\.(?:[a-z]{2,13})|(?<=http:\/\/|https:\/\/)[\w.\-]+)\/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])|(?:(?<!@)(?:\w+(?:[.\-]+\w+)*\.(?:[a-z]{2,13})|(?:(?:[0-9](?!\d)|[1-9][0-9](?!\d)|1[0-9]{2}(?!\d)|2[0-4][0-9](?!\d)|25[0-5](?!\d))[.]?){4})\b\/?(?!@)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))*(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])?)) # Commented multi-line version: (?xi) \b (https?:\/{1,3})? # Capture $1: (optional) URL scheme, colon, and slashes ( # Capture $2: Entire matched URL (other than optional protocol://) (?: (?: [\w.\-]+\. # looks like domain name (?:[a-z]{2,13}) # ending in common popular gTLDs | # (?<=http:\/\/|https:\/\/)[\w.\-]+ # hostname preceded by http:// or https:// ) \/ # followed by a slash ) (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] @@ -36,11 +34,14 @@ | # OR, the following to match naked domains: (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_(?<![@.]) (?: \w+ (?:[.\-]+\w+)* \. # avoid matching the last two parts of an email domain like co.uk in [email protected] (?:[a-z]{2,13}) # ending in common popular gTLDs | # or (?:(?:[0-9](?!\d)|[1-9][0-9](?!\d)|1[0-9]{2}(?!\d)|2[0-4][0-9](?!\d)|25[0-5](?!\d))[.]?){4} # IPv4 address, as seen in https://stackoverflow.com/a/13166657/650558 ) \b \/? (?!@) # not succeeded by a @, avoid matching "foo.na" in "[email protected]" @@ -50,13 +51,13 @@ \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) )* (?: # End with: \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'\".,<>?«»“”‘’] # not a space or one of these punct chars )? ) )  - 
        
winzig revised this gist
May 16, 2018 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:\/{1,3}|[a-z0-9%])|[a-z0-9.\-]+\.(?:[a-z0-9]{2,13})\/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z0-9]{2,13})\b\/?(?!@)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))) # Commented multi-line version: @@ -16,7 +16,7 @@ ) | # or [a-z0-9.\-]+\. # looks like domain name (?:[a-z0-9]{2,13}) # ending in common popular gTLDs (or final octet of IPv4 IP) \/ # followed by a slash ) (?: # One or more: @@ -40,7 +40,7 @@ (?:[.\-][a-z0-9]+)* \. # avoid matching the last two parts of an email domain # like co.uk in [email protected] (?:[a-z0-9]{2,13}) # ending in common popular gTLDs (or final octet of IPv4 IP) \b \/? (?!@) # not succeeded by a @, avoid matching "foo.na" in "[email protected]"  - 
        
winzig revised this gist
May 16, 2018 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:\/{1,3}|[a-z0-9%])|[a-z0-9.\-]+\.(?:[a-z]{2,13})\/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b\/?(?!@)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))) # Commented multi-line version: @@ -15,10 +15,10 @@ # (Trying not to match e.g. "URI::Escape") ) | # or [a-z0-9.\-]+\. # looks like domain name (?:[a-z]{2,13}) # ending in common popular gTLDs \/ # followed by a slash ) (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or  - 
        
winzig revised this gist
May 16, 2018 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:\/{1,3}|[a-z0-9%])|[a-z0-9.\-]+\.\/)(?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b\/?(?!@)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))) # Commented multi-line version: @@ -31,7 +31,7 @@ | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'\".,<>?«»“”‘’] # not a space or one of these punct chars ) | # OR, the following to match naked domains: (?: @@ -56,7 +56,7 @@ | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'\".,<>?«»“”‘’] # not a space or one of these punct chars ) ) )  - 
        
winzig revised this gist
May 16, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -35,7 +35,7 @@ ) | # OR, the following to match naked domains: (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_(?<![@.]) [a-z0-9]+ (?:[.\-][a-z0-9]+)* \. # avoid matching the last two parts of an email domain  - 
        
winzig revised this gist
May 16, 2018 . No changes.There are no files selected for viewing
 - 
        
winzig revised this gist
May 16, 2018 . 1 changed file with 41 additions and 27 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,48 +1,62 @@ # Single-line version: (?i)\b((?:https?:(?:\/{1,3}|[a-z0-9%])|[a-z0-9.\-]+\.\/)(?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b\/?(?!@)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))) # Commented multi-line version: (?xi) \b ( # Capture 1: entire matched URL (?: https?: # URL protocol and colon (?: \/{1,3} # 1-3 slashes | # or [a-z0-9%] # Single letter or digit or '%' # (Trying not to match e.g. "URI::Escape") ) | # or [a-z0-9.\-]+\. # looks like domain name followed by a slash: \/ ) (?:[a-z]{2,13}) # Covers common popular gTLDs (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) )+ (?: # End with: \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars ) | # OR, the following to match naked domains: (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_(?<![@.]) [a-z0-9]+ (?:[.\-][a-z0-9]+)* \. # avoid matching the last two parts of an email domain # like co.uk in [email protected] (?:[a-z]{2,13}) \b \/? (?!@) # not succeeded by a @, avoid matching "foo.na" in "[email protected]" (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) )+ (?: # End with: \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars ) ) )  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -37,8 +37,8 @@ (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_(?<![@.]) [a-z0-9]+ (?:[.\-][a-z0-9]+)* [.] # avoid matching the last two parts of an email domain # like co.uk in [email protected] (?:[a-z]{2,13}) \b  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.]/)(?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b/?(?!@))) # Commented multi-line version:  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.]/)(?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:\b[.\-][a-z0-9]+)*(?<![@.])(?:[a-z]{2,13})\b/?(?!@))) # Commented multi-line version:  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 22 additions and 22 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,41 +5,41 @@ (?xi) \b ( # Capture 1: entire matched URL (?: https?: # URL protocol and colon (?: /{1,3} # 1-3 slashes | # or [a-z0-9%] # Single letter or digit or '%' # (Trying not to match e.g. "URI::Escape") ) | # or [a-z0-9.\-]+[.] # looks like domain name followed by a slash: / ) (?:[a-z]{2,13}) # Covers common popular gTLDs (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) )+ (?: # End with: \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars ) | # OR, the following to match naked domains: (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_(?<![@.]) [a-z0-9]+ (?:\b[.\-][a-z0-9]+)* (?<![@.]) # avoid matching the last two parts of an email domain # like co.uk in [email protected] (?:[a-z]{2,13}) \b /?  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,7 +38,7 @@ (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_ [a-z0-9]+ (?:\b[.\-][a-z0-9]+)* [.] (?:[a-z]{2,13}) \b  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,9 +17,9 @@ | # or # looks like domain name followed by a slash: [a-z0-9.\-]+[.] / ) (?:[a-z]{2,13}) (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or  - 
        
winzig revised this gist
May 3, 2018 . 1 changed file with 1 addition and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.]/)(?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b/?(?!@))) # Commented multi-line version:  - 
        
winzig revised this gist
Feb 9, 2014 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # Single-line version: (?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:[a-z]{2,13})(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:[a-z]{2,13})\b/?(?!@))) # Commented multi-line version: @@ -18,7 +18,7 @@ | # or # looks like domain name followed by a slash: [a-z0-9.\-]+[.] (?:[a-z]{2,13}) / ) (?: # One or more: @@ -41,7 +41,7 @@ [a-z0-9]+ (?:[.\-][a-z0-9]+)* [.] (?:[a-z]{2,13}) \b /? (?!@) # not succeeded by a @, avoid matching "foo.na" in "[email protected]"  - 
        
gruber renamed this gist
Feb 8, 2014 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. - 
        
gruber created this gist
Feb 8, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,49 @@ # Single-line version: (?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@))) # Commented multi-line version: (?xi) \b ( # Capture 1: entire matched URL (?: https?: # URL protocol and colon (?: /{1,3} # 1-3 slashes | # or [a-z0-9%] # Single letter or digit or '%' # (Trying not to match e.g. "URI::Escape") ) | # or # looks like domain name followed by a slash: [a-z0-9.\-]+[.] (?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj| Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw) / ) (?: # One or more: [^\s()<>{}\[\]]+ # Run of non-space, non-()<>{}[] | # or \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) )+ (?: # End with: \([^\s()]*?\([^\s()]+\)[^\s()]*?\) # balanced parens, one level deep: (…(…)…) | \([^\s]+?\) # balanced parens, non-recursive: (…) | # or [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars ) | # OR, the following to match naked domains: (?: (?<!@) # not preceded by a @, avoid matching foo@_gmail.com_ [a-z0-9]+ (?:[.\-][a-z0-9]+)* [.] (?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj| Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw) \b /? (?!@) # not succeeded by a @, avoid matching "foo.na" in "[email protected]" ) )