This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import argparse | |
| import csv | |
| from collections import OrderedDict | |
| import re | |
| parser = argparse.ArgumentParser(description='Remove duplicate URls from csv of pages mentioning C8.') | |
| parser.add_argument('file', help='The CSV file to read') | |
| args = parser.parse_args() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ## Get XML for current page | |
| #set($article = $_XPathTool.selectSingleNode($contentRoot, "/system-index-block/calling-page/system-page")) | |
| ## Get the title of the page | |
| #set($title = $article.getChild("title")) | |
| #set($link = $article.getChild("path").text) | |
| #set($summary = $article.getChild("summary")) | |
| #set($image = $article.getChild("system-data-structure").getChild("lead-image").getChild("image").getChild("path").text) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import bs4 | |
| from bs4 import BeautifulSoup | |
| from unidecode import unidecode | |
| import urllib | |
| import re | |
| import datetime | |
| import requests | |
| import argparse | |
| date_regex = re.compile(r"[A-Za-z]+\s*\d{1,2}\,\s*\d{4}") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <!DOCTYPE html> | |
| <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> | |
| <head> | |
| <!-- BLOCK: uniform campus head --> | |
| <meta charset="UTF-8"/> | |
| <meta content="IE=edge" http-equiv="X-UA-Compatible"/> | |
| <meta content="width=device-width, user-scalable=yes, initial-scale=1.0, minimum-scale=1.0, maximum-scale=2.0" name="viewport"/> | |
| <link href="../favicon.ico" rel="shortcut icon"/> | |
| <link href="../apple-touch-icon.png" rel="apple-touch-icon"/> |