Skip to content

Instantly share code, notes, and snippets.

@eruffaldi
Created October 22, 2025 11:36
Show Gist options
  • Save eruffaldi/cef6e458d1029c84bf38f08aec128dca to your computer and use it in GitHub Desktop.
Save eruffaldi/cef6e458d1029c84bf38f08aec128dca to your computer and use it in GitHub Desktop.
Converts Word DOCX to PDF with some extra checks
# DOCX to PDF after Markdown generation
# Copyright Emanuele Ruffaldi MMI 2024
# License: MIT
#
# Usage:
# script INPUT.DOCX
#
# Output: INPUT.PDF
#
# Requirements
# - remove existing PDF
# - FAIL if DOCX missing
# - FAIL if PDF cannot be generated
# - FAIL if there are missing references
# - launches Word unattended, hidden and timeout of 60s
# - regenrates fields and toc
# - export PDF without comments and changes
# - steps: open, update fields and toc, check references, save, export pdf
param (
[string]$in
)
# Ensure the input file path is absolute
$inputFilePath=$in
$inputFilePath = [System.IO.Path]::GetFullPath($inputFilePath)
if (-not (Test-Path $inputFilePath)) {
Write-Error "The specified input file does not exist: $inputFilePath"
exit 1
}
# Generate the output file path by changing the extension to .pdf
$outputFilePath = [System.IO.Path]::ChangeExtension($inputFilePath, ".pdf")
# Remove if exists
if ((Test-Path $outputFilePath)) {
Remove-Item $outputFilePath
}
# Timeout
$timeoutSeconds = 60
$ProcessWordDocument = {
param ($inputFilePath, $outputFilePath)
Write-Output "ProcessWordDocument"
# Create a new Word application object
$word = New-Object -ComObject Word.Application
$word.Visible = $false # Keep Word application hidden
$word.DisplayAlerts = [Microsoft.Office.Interop.Word.WdAlertLevel]::wdAlertsNone # Disable alerts
try {
# Open the document
$document = $word.Documents.Open($inputFilePath)
$document.Fields.Update()
# Update table of contents specifically
foreach ($toc in $document.TablesOfContents) {
$toc.Update()
}
# Save the updated Word document
$document.Save()
# Check for any errors in fields (missing references)
$missingReferences = $false
foreach ($field in $document.Fields) {
if ($field.Result.Text -match 'Error! Reference source not found') {
$missingReferences = $true
break
}
}
if ($missingReferences) {
throw "The document contains missing references."
}
# Hide comments
$word.ActiveWindow.View.ShowRevisionsAndComments = $false
$word.ActiveWindow.View.RevisionsFilter.Markup = [Microsoft.Office.Interop.Word.WdRevisionsMarkup]::wdRevisionsMarkupNone
$word.ActiveWindow.View.RevisionsFilter.View = [Microsoft.Office.Interop.Word.WdRevisionsView]::wdRevisionsViewFinal
# Export the document to PDF (17 is the WdSaveFormat enumeration for PDF)
$document.SaveAs([ref] $outputFilePath, 17)
# Close the document
$document.Close($false)
} catch {
Write-Error "An error occurred: $_"
} finally {
# Quit the Word application
$word.Quit()
# Release the COM objects
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($document) | Out-Null
[System.Runtime.Interopservices.Marshal]::ReleaseComObject($word) | Out-Null
# Clean up
Remove-Variable -Name document
Remove-Variable -Name word
}
}
# Enable this for debugging without launching the Job
$testMode = $false
if($testMode)
{
& $ProcessWordDocument $inputFilePath $outputFilePath
}
else
{
# Start the Word processing job
$job = Start-Job -ScriptBlock $ProcessWordDocument -ArgumentList $inputFilePath, $outputFilePath
# Monitor the job with timeout
$job | Wait-Job -Timeout $timeoutSeconds
Receive-Job $job -ErrorAction Stop
if ($job.State -eq 'Completed') {
}
elseif ($job.State -eq 'Failed') {
Write-Output "Document processing Failed: " + $job.ChildJobs[0].Error + ($job.ChildJobs[0].JobStateInfo.Reason.Message)
} else {
Write-Output "Document processing timed out. Terminating job: " + $job.State
Stop-Job -Job $job
}
}
# Clean up any lingering Word processes
# Get-Process winword -ErrorAction SilentlyContinue | ForEach-Object { $_.Kill() }
if ((Test-Path $outputFilePath)) {
exit 0
}
else
{
exit 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment