// set location of docx text content file $xmlFile = $targetDir."/word/document.xml"; $reader = new XMLReader; $reader->open($xmlFile); // set up variables for formatting $text = ''; $formatting['bold'] = 'closed'; $formatting['italic'] = 'closed'; $formatting['underline'] = 'closed'; $formatting['header'] = 0; // loop through docx xml dom while ($reader->read()){ // look for new paragraphs if ($reader->nodeType == XMLREADER::ELEMENT && $reader->name === 'w:p'){ // set up new instance of XMLReader for parsing paragraph independantly $paragraph = new XMLReader; $p = $reader->readOuterXML(); $paragraph->xml($p); // search for heading preg_match('/ 0) ? '' : '

'; // loop through paragraph dom while ($paragraph->read()){ // look for elements if ($paragraph->nodeType == XMLREADER::ELEMENT && $paragraph->name === 'w:r'){ $node = trim($paragraph->readInnerXML()); // add
tags if (strstr($node,'')) ? (($formatting['bold'] == 'closed') ? 'open' : $formatting['bold']) : (($formatting['bold'] == 'opened') ? 'close' : $formatting['bold']); $formatting['italic'] = (strstr($node,'')) ? (($formatting['italic'] == 'closed') ? 'open' : $formatting['italic']) : (($formatting['italic'] == 'opened') ? 'close' : $formatting['italic']); $formatting['underline'] = (strstr($node,'' : ''). (($formatting['italic'] == 'open') ? '' : ''). (($formatting['underline'] == 'open') ? '' : ''). $paragraph->expand()->textContent. (($formatting['underline'] == 'close') ? '' : ''). (($formatting['italic'] == 'close') ? '' : ''). (($formatting['bold'] == 'close') ? '' : ''); // reset formatting variables foreach ($formatting as $key=>$format){ if ($format == 'open') $formatting[$key] = 'opened'; if ($format == 'close') $formatting[$key] = 'closed'; } } } $text .= ($formatting['header'] > 0) ? '' : '

'; } } $reader->close(); // fix invalid html $doc = new DOMDocument(); $doc->encoding = 'UTF-8'; $doc->loadHTML($text); $goodHTML = simplexml_import_dom($doc)->asXML();