/// Quick-n-dirty translation of MWFeedParser's algorithm from Objective-C to Swift /// seealso: https://github.com/mwaterfall/MWFeedParser/blob/master/Classes/NSString%2BHTML.m public extension NSString { public func byConvertingHTMLToPlainText() -> String { let stopCharacters = CharacterSet(charactersIn: "< \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)") let newLineAndWhitespaceCharacters = CharacterSet(charactersIn: " \t\n\r\(0x0085)\(0x000C)\(0x2028)\(0x2029)") let tagNameCharacters = CharacterSet(charactersIn: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") let result = NSMutableString(capacity: length) let scanner = Scanner(string: self as String) scanner.charactersToBeSkipped = nil scanner.caseSensitive = true var str: NSString? = nil var tagName: NSString? = nil var dontReplaceTagWithSpace = false repeat { // Scan up to the start of a tag or whitespace if scanner.scanUpToCharacters(from: stopCharacters, into: &str), let s = str { result.append(s as String) str = nil } // Check if we've stopped at a tag/comment or whitespace if scanner.scanString("<", into: nil) { // Stopped at a comment, script tag, or other tag if scanner.scanString("!--", into: nil) { // Comment scanner.scanUpTo("-->", into: nil) scanner.scanString("-->", into: nil) } else if scanner.scanString("script", into: nil) { // Script tag where things don't need escaping! scanner.scanUpTo("", into: nil) scanner.scanString("", into: nil) } else { // Tag - remove and replace with space unless it's // a closing inline tag then dont replace with a space if scanner.scanString("/", into: nil) { // Closing tag - replace with space unless it's inline tagName = nil dontReplaceTagWithSpace = false if scanner.scanCharacters(from: tagNameCharacters, into: &tagName), let t = tagName { tagName = t.lowercased as NSString dontReplaceTagWithSpace = tagName == "a" || tagName == "b" || tagName == "i" || tagName == "q" || tagName == "span" || tagName == "em" || tagName == "strong" || tagName == "cite" || tagName == "abbr" || tagName == "acronym" || tagName == "label" } // Replace tag with string unless it was an inline if !dontReplaceTagWithSpace && result.length > 0 && !scanner.isAtEnd { result.append(" ") } } // Scan past tag scanner.scanUpTo(">", into: nil) scanner.scanString(">", into: nil) } } else { // Stopped at whitespace - replace all whitespace and newlines with a space if scanner.scanCharacters(from: newLineAndWhitespaceCharacters, into: nil) { if result.length > 0 && !scanner.isAtEnd { result.append(" ") // Dont append space to beginning or end of result } } } } while !scanner.isAtEnd // Cleanup // Decode HTML entities and return (this isn't included in this gist, but is often important) // let retString = (result as String).stringByDecodingHTMLEntities // Return return result as String // retString; } }