# DOCX to PDF after Markdown generation # Copyright Emanuele Ruffaldi MMI 2024 # License: MIT # # Usage: # script INPUT.DOCX # # Output: INPUT.PDF # # Requirements # - remove existing PDF # - FAIL if DOCX missing # - FAIL if PDF cannot be generated # - FAIL if there are missing references # - launches Word unattended, hidden and timeout of 60s # - regenrates fields and toc # - export PDF without comments and changes # - steps: open, update fields and toc, check references, save, export pdf param ( [string]$in ) # Ensure the input file path is absolute $inputFilePath=$in $inputFilePath = [System.IO.Path]::GetFullPath($inputFilePath) if (-not (Test-Path $inputFilePath)) { Write-Error "The specified input file does not exist: $inputFilePath" exit 1 } # Generate the output file path by changing the extension to .pdf $outputFilePath = [System.IO.Path]::ChangeExtension($inputFilePath, ".pdf") # Remove if exists if ((Test-Path $outputFilePath)) { Remove-Item $outputFilePath } # Timeout $timeoutSeconds = 60 $ProcessWordDocument = { param ($inputFilePath, $outputFilePath) Write-Output "ProcessWordDocument" # Create a new Word application object $word = New-Object -ComObject Word.Application $word.Visible = $false # Keep Word application hidden $word.DisplayAlerts = [Microsoft.Office.Interop.Word.WdAlertLevel]::wdAlertsNone # Disable alerts try { # Open the document $document = $word.Documents.Open($inputFilePath) $document.Fields.Update() # Update table of contents specifically foreach ($toc in $document.TablesOfContents) { $toc.Update() } # Save the updated Word document $document.Save() # Check for any errors in fields (missing references) $missingReferences = $false foreach ($field in $document.Fields) { if ($field.Result.Text -match 'Error! Reference source not found') { $missingReferences = $true break } } if ($missingReferences) { throw "The document contains missing references." } # Hide comments $word.ActiveWindow.View.ShowRevisionsAndComments = $false $word.ActiveWindow.View.RevisionsFilter.Markup = [Microsoft.Office.Interop.Word.WdRevisionsMarkup]::wdRevisionsMarkupNone $word.ActiveWindow.View.RevisionsFilter.View = [Microsoft.Office.Interop.Word.WdRevisionsView]::wdRevisionsViewFinal # Export the document to PDF (17 is the WdSaveFormat enumeration for PDF) $document.SaveAs([ref] $outputFilePath, 17) # Close the document $document.Close($false) } catch { Write-Error "An error occurred: $_" } finally { # Quit the Word application $word.Quit() # Release the COM objects [System.Runtime.Interopservices.Marshal]::ReleaseComObject($document) | Out-Null [System.Runtime.Interopservices.Marshal]::ReleaseComObject($word) | Out-Null # Clean up Remove-Variable -Name document Remove-Variable -Name word } } # Enable this for debugging without launching the Job $testMode = $false if($testMode) { & $ProcessWordDocument $inputFilePath $outputFilePath } else { # Start the Word processing job $job = Start-Job -ScriptBlock $ProcessWordDocument -ArgumentList $inputFilePath, $outputFilePath # Monitor the job with timeout $job | Wait-Job -Timeout $timeoutSeconds Receive-Job $job -ErrorAction Stop if ($job.State -eq 'Completed') { } elseif ($job.State -eq 'Failed') { Write-Output "Document processing Failed: " + $job.ChildJobs[0].Error + ($job.ChildJobs[0].JobStateInfo.Reason.Message) } else { Write-Output "Document processing timed out. Terminating job: " + $job.State Stop-Job -Job $job } } # Clean up any lingering Word processes # Get-Process winword -ErrorAction SilentlyContinue | ForEach-Object { $_.Kill() } if ((Test-Path $outputFilePath)) { exit 0 } else { exit 1 }