Skip to content

Instantly share code, notes, and snippets.

@fecori
Created May 30, 2019 23:15
Show Gist options
  • Save fecori/33d2db63266985d7eef08dcbfee1f1a3 to your computer and use it in GitHub Desktop.
Save fecori/33d2db63266985d7eef08dcbfee1f1a3 to your computer and use it in GitHub Desktop.
function convertHtmlToText() {
var inputText = document.getElementById("input").value;
var returnText = "" + inputText;
//-- remove BR tags and replace them with line break
returnText=returnText.replace(/<br>/gi, "\n");
returnText=returnText.replace(/<br\s\/>/gi, "\n");
returnText=returnText.replace(/<br\/>/gi, "\n");
//-- remove P and A tags but preserve what's inside of them
returnText=returnText.replace(/<p.*>/gi, "\n");
returnText=returnText.replace(/<a.*href="(.*?)".*>(.*?)<\/a>/gi, " $2 ($1)");
//-- remove all inside SCRIPT and STYLE tags
returnText=returnText.replace(/<script.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/script>/gi, "");
returnText=returnText.replace(/<style.*>[\w\W]{1,}(.*?)[\w\W]{1,}<\/style>/gi, "");
//-- remove all else
returnText=returnText.replace(/<(?:.|\s)*?>/g, "");
//-- get rid of more than 2 multiple line breaks:
returnText=returnText.replace(/(?:(?:\r\n|\r|\n)\s*){2,}/gim, "\n\n");
//-- get rid of more than 2 spaces:
returnText = returnText.replace(/ +(?= )/g,'');
//-- get rid of html-encoded characters:
returnText=returnText.replace(/&nbsp;/gi," ");
returnText=returnText.replace(/&amp;/gi,"&");
returnText=returnText.replace(/&quot;/gi,'"');
returnText=returnText.replace(/&lt;/gi,'<');
returnText=returnText.replace(/&gt;/gi,'>');
//-- return
document.getElementById("output").value = returnText;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment