extract_pptx_notes <- function(pp_file, format = c("md", "docx")) { if (!requireNamespace("officer", quietly = TRUE)) stop("pacakge 'officer' required.") if (!requireNamespace("xml2", quietly = TRUE)) stop("pacakge 'xml2' required.") if (!requireNamespace("rmarkdown", quietly = TRUE)) stop("pacakge 'rmarkdown' required.") if (!requireNamespace("glue", quietly = TRUE)) stop("pacakge 'glue' required.") format <- match.arg(format) pp <- read_pptx(pp_file) file_sans_ext <- tools::file_path_sans_ext(pp_file) slides_with_notes_meta <- pp$notesSlide$get_metadata() slides_with_notes_meta$notes <- as.numeric(gsub("notesSlide(\\d{1,3}).*", "\\1", rownames(slides_with_notes_meta))) slides_with_notes_meta <- slides_with_notes_meta[!grepl("notesMaster", slides_with_notes_meta$target), ] slides_with_notes_meta$slide <- as.numeric(gsub(".+slide(\\d{1,3})\\.xml", "\\1", slides_with_notes_meta$target)) if (!nrow(slides_with_notes_meta)) stop("No notes in this presentation") slide_nums <- seq(1, max(slides_with_notes_meta$slide)) notes <- lapply(slide_nums, \(x) { notes_slide <- slides_with_notes_meta[slides_with_notes_meta$slide == x, "notes"] if (!length(notes_slide)) return(character(0)) slide <- pp$notesSlide$get_slide(notes_slide) xml <- slide$get() # xpath search from here: https://robaboukhalil.medium.com/your-slide-deck-is-a-zip-file-in-disguise-36bb14f11c0b xpath <- "//*[local-name()='txBody']/*[local-name()='p']/*[local-name()='r']/*[local-name()='t']/text()" node <- xml_find_all(xml, xpath) as.character(node) }) names(notes) <- paste("Slide", slide_nums) out <- paste0(file_sans_ext, "_notes.md") if (file.exists(out)) { overwrite <- askYesNo(glue("File {out} already exists. Overwrite?")) if (!overwrite) stop("Quitting", call. = FALSE) file.remove(out) } cat(glue('--- title: "{basename(file_sans_ext)}" output: word_document date: "{Sys.Date()}" ---\n\n\n'), file = out) for (n in names(notes)) { cat(paste0("## ", n, ":\n\n"), file = out, append = TRUE) if (length(notes[[n]])) { cat(notes[[n]], file = out, sep = "\n\n", append = TRUE) cat("\n", file = out, append = TRUE) } } if (format == "docx") { out_docx <- paste0(file_sans_ext, "_notes.docx") return(rmarkdown::render(out, output_file = out_docx)) } out }