Created
October 22, 2025 11:36
-
-
Save eruffaldi/cef6e458d1029c84bf38f08aec128dca to your computer and use it in GitHub Desktop.
Converts Word DOCX to PDF with some extra checks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # DOCX to PDF after Markdown generation | |
| # Copyright Emanuele Ruffaldi MMI 2024 | |
| # License: MIT | |
| # | |
| # Usage: | |
| # script INPUT.DOCX | |
| # | |
| # Output: INPUT.PDF | |
| # | |
| # Requirements | |
| # - remove existing PDF | |
| # - FAIL if DOCX missing | |
| # - FAIL if PDF cannot be generated | |
| # - FAIL if there are missing references | |
| # - launches Word unattended, hidden and timeout of 60s | |
| # - regenrates fields and toc | |
| # - export PDF without comments and changes | |
| # - steps: open, update fields and toc, check references, save, export pdf | |
| param ( | |
| [string]$in | |
| ) | |
| # Ensure the input file path is absolute | |
| $inputFilePath=$in | |
| $inputFilePath = [System.IO.Path]::GetFullPath($inputFilePath) | |
| if (-not (Test-Path $inputFilePath)) { | |
| Write-Error "The specified input file does not exist: $inputFilePath" | |
| exit 1 | |
| } | |
| # Generate the output file path by changing the extension to .pdf | |
| $outputFilePath = [System.IO.Path]::ChangeExtension($inputFilePath, ".pdf") | |
| # Remove if exists | |
| if ((Test-Path $outputFilePath)) { | |
| Remove-Item $outputFilePath | |
| } | |
| # Timeout | |
| $timeoutSeconds = 60 | |
| $ProcessWordDocument = { | |
| param ($inputFilePath, $outputFilePath) | |
| Write-Output "ProcessWordDocument" | |
| # Create a new Word application object | |
| $word = New-Object -ComObject Word.Application | |
| $word.Visible = $false # Keep Word application hidden | |
| $word.DisplayAlerts = [Microsoft.Office.Interop.Word.WdAlertLevel]::wdAlertsNone # Disable alerts | |
| try { | |
| # Open the document | |
| $document = $word.Documents.Open($inputFilePath) | |
| $document.Fields.Update() | |
| # Update table of contents specifically | |
| foreach ($toc in $document.TablesOfContents) { | |
| $toc.Update() | |
| } | |
| # Save the updated Word document | |
| $document.Save() | |
| # Check for any errors in fields (missing references) | |
| $missingReferences = $false | |
| foreach ($field in $document.Fields) { | |
| if ($field.Result.Text -match 'Error! Reference source not found') { | |
| $missingReferences = $true | |
| break | |
| } | |
| } | |
| if ($missingReferences) { | |
| throw "The document contains missing references." | |
| } | |
| # Hide comments | |
| $word.ActiveWindow.View.ShowRevisionsAndComments = $false | |
| $word.ActiveWindow.View.RevisionsFilter.Markup = [Microsoft.Office.Interop.Word.WdRevisionsMarkup]::wdRevisionsMarkupNone | |
| $word.ActiveWindow.View.RevisionsFilter.View = [Microsoft.Office.Interop.Word.WdRevisionsView]::wdRevisionsViewFinal | |
| # Export the document to PDF (17 is the WdSaveFormat enumeration for PDF) | |
| $document.SaveAs([ref] $outputFilePath, 17) | |
| # Close the document | |
| $document.Close($false) | |
| } catch { | |
| Write-Error "An error occurred: $_" | |
| } finally { | |
| # Quit the Word application | |
| $word.Quit() | |
| # Release the COM objects | |
| [System.Runtime.Interopservices.Marshal]::ReleaseComObject($document) | Out-Null | |
| [System.Runtime.Interopservices.Marshal]::ReleaseComObject($word) | Out-Null | |
| # Clean up | |
| Remove-Variable -Name document | |
| Remove-Variable -Name word | |
| } | |
| } | |
| # Enable this for debugging without launching the Job | |
| $testMode = $false | |
| if($testMode) | |
| { | |
| & $ProcessWordDocument $inputFilePath $outputFilePath | |
| } | |
| else | |
| { | |
| # Start the Word processing job | |
| $job = Start-Job -ScriptBlock $ProcessWordDocument -ArgumentList $inputFilePath, $outputFilePath | |
| # Monitor the job with timeout | |
| $job | Wait-Job -Timeout $timeoutSeconds | |
| Receive-Job $job -ErrorAction Stop | |
| if ($job.State -eq 'Completed') { | |
| } | |
| elseif ($job.State -eq 'Failed') { | |
| Write-Output "Document processing Failed: " + $job.ChildJobs[0].Error + ($job.ChildJobs[0].JobStateInfo.Reason.Message) | |
| } else { | |
| Write-Output "Document processing timed out. Terminating job: " + $job.State | |
| Stop-Job -Job $job | |
| } | |
| } | |
| # Clean up any lingering Word processes | |
| # Get-Process winword -ErrorAction SilentlyContinue | ForEach-Object { $_.Kill() } | |
| if ((Test-Path $outputFilePath)) { | |
| exit 0 | |
| } | |
| else | |
| { | |
| exit 1 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment