Last active
May 19, 2022 23:48
-
-
Save ateucher/a60e539f70bdaff2e13362fda4ec4deb to your computer and use it in GitHub Desktop.
Revisions
-
ateucher revised this gist
May 19, 2022 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -64,4 +64,4 @@ date: "{Sys.Date()}" } out } -
ateucher revised this gist
May 19, 2022 . 1 changed file with 1 addition and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -64,9 +64,4 @@ date: "{Sys.Date()}" } out } -
ateucher created this gist
May 19, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,72 @@ extract_pptx_notes <- function(pp_file, format = c("md", "docx")) { if (!requireNamespace("officer", quietly = TRUE)) stop("pacakge 'officer' required.") if (!requireNamespace("xml2", quietly = TRUE)) stop("pacakge 'xml2' required.") if (!requireNamespace("rmarkdown", quietly = TRUE)) stop("pacakge 'rmarkdown' required.") if (!requireNamespace("glue", quietly = TRUE)) stop("pacakge 'glue' required.") format <- match.arg(format) pp <- read_pptx(pp_file) file_sans_ext <- tools::file_path_sans_ext(pp_file) slides_with_notes_meta <- pp$notesSlide$get_metadata() slides_with_notes_meta$notes <- as.numeric(gsub("notesSlide(\\d{1,3}).*", "\\1", rownames(slides_with_notes_meta))) slides_with_notes_meta <- slides_with_notes_meta[!grepl("notesMaster", slides_with_notes_meta$target), ] slides_with_notes_meta$slide <- as.numeric(gsub(".+slide(\\d{1,3})\\.xml", "\\1", slides_with_notes_meta$target)) if (!nrow(slides_with_notes_meta)) stop("No notes in this presentation") slide_nums <- seq(1, max(slides_with_notes_meta$slide)) notes <- lapply(slide_nums, \(x) { notes_slide <- slides_with_notes_meta[slides_with_notes_meta$slide == x, "notes"] if (!length(notes_slide)) return(character(0)) slide <- pp$notesSlide$get_slide(notes_slide) xml <- slide$get() # xpath search from here: https://robaboukhalil.medium.com/your-slide-deck-is-a-zip-file-in-disguise-36bb14f11c0b xpath <- "//*[local-name()='txBody']/*[local-name()='p']/*[local-name()='r']/*[local-name()='t']/text()" node <- xml_find_all(xml, xpath) as.character(node) }) names(notes) <- paste("Slide", slide_nums) out <- paste0(file_sans_ext, "_notes.md") if (file.exists(out)) { overwrite <- askYesNo(glue("File {out} already exists. Overwrite?")) if (!overwrite) stop("Quitting", call. = FALSE) file.remove(out) } cat(glue('--- title: "{basename(file_sans_ext)}" output: word_document date: "{Sys.Date()}" ---\n\n\n'), file = out) for (n in names(notes)) { cat(paste0("## ", n, ":\n\n"), file = out, append = TRUE) if (length(notes[[n]])) { cat(notes[[n]], file = out, sep = "\n\n", append = TRUE) cat("\n", file = out, append = TRUE) } } if (format == "docx") { out_docx <- paste0(file_sans_ext, "_notes.docx") return(rmarkdown::render(out, output_file = out_docx)) } out } pp_file <- "/Users/ateucher/OneDrive - Government of BC/LUP ppt prep_2020-10-14-2-ACT.pptx" extract_pptx_notes(pp_file, "docx")