Last active
October 31, 2023 08:27
-
-
Save nilslindemann/4a996ac153c7ada29b88c8b4a7bab8c2 to your computer and use it in GitHub Desktop.
Revisions
-
Nils Lindemann revised this gist
Oct 31, 2023 . 1 changed file with 8 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -77,7 +77,7 @@ def iter_lines_and_context( def iter_file_search_results( root: Path, ignore: list[Callable[[Url], IfFilterMatches]], linkpattern: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] @@ -108,7 +108,7 @@ def iter_file_search_results( results.links[lnum] = line_results line_results.append((link, True)) # type: ignore for filter in ignore: for links in results.links.values(): for index, link in enumerate(links): if link[1] and filter(link[0]): links[index] = (link[0], False) @@ -174,7 +174,7 @@ def __call__(self, url: Url): @dataclass class IsWellFormed: desc: str = "a well-formed local relative link to a .md / .png / .py" word: PatternSnippet = r""" [a-z][a-z0-9]* @@ -192,7 +192,7 @@ class IsWellFormed: {word}[.](?: md|png|py ) """ hash: PatternSnippet = r""" (?: [#][^#]+ ) """ @@ -227,7 +227,10 @@ def __call__(self, url: Url): if searchresult: nasty_urls.append(searchresult) if nasty_urls: print(f"\nThese links under {DOCS_ROOT} are not:\n") for filter in filters: print(f"* {filter.desc}") print() print("\n".join(nasty_urls)) else: print(f"\nEach link under {DOCS_ROOT} is:\n") -
Nils Lindemann revised this gist
Oct 30, 2023 . 1 changed file with 24 additions and 23 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,8 @@ Line = str LineNumber = int Url = str SearchResult = str PatternSnippet = str IfNotFiltered = bool IfFilterMatches = bool @@ -24,13 +25,13 @@ CODE_EXAMPLES_REFERENCE_POINT = DOCS_ROOT / "en/docs" NONEXISTING_TARGETS: set[Path] = set() EXTERNAL_URLS: set[Url] = set() @dataclass class FileSearchResults: filepath: Path links: dict[LineNumber, list[tuple[Url, IfNotFiltered]]] = field( default_factory=dict ) @@ -76,7 +77,7 @@ def iter_lines_and_context( def iter_file_search_results( root: Path, ignore: list[Callable[[Url], IfFilterMatches]] = [], linkpattern: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] @@ -117,7 +118,7 @@ def iter_file_search_results( def check_target_exists( file: Path, url: Url, in_code: bool, without_hash_pattern: re.Pattern[str] = re.compile( r""" @@ -137,19 +138,19 @@ def check_target_exists( ): if in_code: reference_point = CODE_EXAMPLES_REFERENCE_POINT cleanurl = url else: if url.startswith("https://"): EXTERNAL_URLS.add(url) return reference_point = file.parent match = without_hash_pattern.match(url) if not match: raise Exception("could not match link, this should not happen") cleanurl = match.group(1) if not cleanurl: return joined = reference_point / cleanurl try: joined.resolve(strict=True) except FileNotFoundError: @@ -165,8 +166,8 @@ def check_target_exists( class IsExternal: desc: str = "a https link to an external url" def __call__(self, url: Url): if url.startswith("https://"): return True return False @@ -212,22 +213,22 @@ class IsWellFormed: re.VERBOSE, ) def __call__(self, url: Url): if self.wellformed_pat.match(url): return True return False if __name__ == "__main__": filters = [IsExternal(), IsWellFormed()] nasty_urls: list[SearchResult] = [] for searchresult in iter_file_search_results(DOCS_ROOT, ignore=filters): searchresult = str(searchresult) if searchresult: nasty_urls.append(searchresult) if nasty_urls: print("\nThese links are not well-formed:\n") print("\n".join(nasty_urls)) else: print(f"\nEach link under {DOCS_ROOT} is:\n") for filter in filters: -
Nils Lindemann revised this gist
Oct 30, 2023 . 1 changed file with 15 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,9 +7,13 @@ from pathlib import Path from typing import Any, Callable, Generator Line = str LineNumber = int Link = str PatternSnippet = str IfNotFiltered = bool IfFilterMatches = bool IfInCode = bool # config @@ -26,7 +30,9 @@ @dataclass class FileSearchResults: filepath: Path links: dict[LineNumber, list[tuple[Link, IfNotFiltered]]] = field( default_factory=dict ) def __str__(self): result = [" ", str(self.filepath), "\n"] @@ -58,7 +64,7 @@ def iter_markdowns(path: Path) -> Generator[Path, Any, None]: def iter_lines_and_context( filepath: Path, ) -> Generator[tuple[LineNumber, Line, IfInCode], Any, None]: with filepath.open("r", encoding="utf-8") as f: in_code = False for lnum, line in enumerate(f, start=1): @@ -70,7 +76,7 @@ def iter_lines_and_context( def iter_file_search_results( root: Path, ignore: list[Callable[[Link], IfFilterMatches]] = [], linkpattern: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] @@ -169,23 +175,23 @@ def __call__(self, link: Link): class IsWellFormed: desc: str = "a local relative link to a .md / .png / .py" word: PatternSnippet = r""" [a-z][a-z0-9]* (?: [-_] [a-z0-9]+ )* """ path: PatternSnippet = rf""" (?: [.][.]/ )* (?: {word}/ )* """ filename: PatternSnippet = rf""" {word}[.](?: md|png|py ) """ hash: PatternSnippet = rf""" (?: [#][^#]+ ) """ -
Nils Lindemann revised this gist
Oct 29, 2023 . 1 changed file with 19 additions and 19 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,7 +24,7 @@ @dataclass class FileSearchResults: filepath: Path links: dict[LineNumber, list[tuple[Link, bool]]] = field(default_factory=dict) @@ -68,7 +68,7 @@ def iter_lines_and_context( yield (lnum, line, in_code) def iter_file_search_results( root: Path, ignore: list[Callable[[Link], bool]] = [], linkpattern: re.Pattern[str] = re.compile( @@ -88,9 +88,9 @@ def iter_file_results( """, re.VERBOSE, ), ) -> Generator[FileSearchResults, Any, None]: for filepath in iter_markdowns(root): results = FileSearchResults(filepath=filepath.resolve(strict=True)) for lnum, line, in_code in iter_lines_and_context(filepath): pattern = in_code_linkpattern if in_code else linkpattern for match in pattern.finditer(line): @@ -102,9 +102,9 @@ def iter_file_results( line_results.append((link, True)) # type: ignore for filter in ignore: for lnum, links in results.links.items(): for index, link in enumerate(links): if link[1] and filter(link[0]): links[index] = (link[0], False) if results.links: yield results @@ -116,8 +116,8 @@ def check_target_exists( without_hash_pattern: re.Pattern[str] = re.compile( r""" ^ ( [^#]* ) # part before the hash (?: [#].* )? # the hash $ """, re.VERBOSE, @@ -140,7 +140,7 @@ def check_target_exists( match = without_hash_pattern.match(link) if not match: raise Exception("could not match link, this should not happen") cleanlink = match.group(1) if not cleanlink: return joined = reference_point / cleanlink @@ -166,7 +166,7 @@ def __call__(self, link: Link): @dataclass class IsWellFormed: desc: str = "a local relative link to a .md / .png / .py" word: str = r""" @@ -213,15 +213,15 @@ def __call__(self, link: Link): if __name__ == "__main__": filters = [IsExternal(), IsWellFormed()] nasty_links: list[str] = [] for result in iter_file_search_results(DOCS_ROOT, ignore=filters): result = str(result) if result: nasty_links.append(result) if nasty_links: print("\nThese links are not well-formed:\n") print("\n".join(nasty_links)) else: print(f"\nEach link under {DOCS_ROOT} is:\n") for filter in filters: -
Nils Lindemann revised this gist
Oct 29, 2023 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -167,7 +167,7 @@ def __call__(self, link: Link): @dataclass class WellFormed: desc: str = "a local relative link to a .md / .png / .py" word: str = r""" [a-z][a-z0-9]* -
Nils Lindemann revised this gist
Oct 29, 2023 . 1 changed file with 0 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,6 @@ from typing import Any, Callable, Generator Link = str LineNumber = int Line = str -
Nils Lindemann revised this gist
Oct 29, 2023 . 1 changed file with 103 additions and 95 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,72 +1,78 @@ # Link checker script for https://github.com/tiangolo/fastapi # Place it under fastapi/scripts or configure DOCS_ROOT below import os import re from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Generator Link = str RawPath = str LineNumber = int Line = str # config DOCS_ROOT = Path("../docs").resolve(strict=True) # end config CODE_EXAMPLES_REFERENCE_POINT = DOCS_ROOT / "en/docs" NONEXISTING_TARGETS: set[Path] = set() EXTERNAL_URLS: set[Link] = set() @dataclass class FileLinks: filepath: Path links: dict[LineNumber, list[tuple[Link, bool]]] = field(default_factory=dict) def __str__(self): result = [" ", str(self.filepath), "\n"] for lnum, links in self.links.items(): not_filtered = [match[0] for match in links if match[1]] if not_filtered: result.extend( [ " ", "[", str(lnum), "] ", ", ".join(not_filtered), "\n", ] ) if len(result) > 3: return "".join(result) return "" def iter_markdowns(path: Path) -> Generator[Path, Any, None]: for root, _, files in os.walk(path): root = Path(root) for file in files: if file.endswith(".md"): yield root / file def iter_lines_and_context( filepath: Path, ) -> Generator[tuple[LineNumber, Line, bool], Any, None]: with filepath.open("r", encoding="utf-8") as f: in_code = False for lnum, line in enumerate(f, start=1): if line.lstrip().startswith("```"): in_code = not in_code continue yield (lnum, line, in_code) def iter_file_results( root: Path, ignore: list[Callable[[Link], bool]] = [], linkpattern: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] \( @@ -75,111 +81,116 @@ def iter_file_results( """, re.VERBOSE, ), in_code_linkpattern: re.Pattern[str] = re.compile( r""" [{]!> [^\S\n]* ( [\S]+?[.]py ) [^\S\n]* ![}] """, re.VERBOSE, ), ) -> Generator[FileLinks, Any, None]: for filepath in iter_markdowns(root): results = FileLinks(filepath=filepath.resolve(strict=True)) for lnum, line, in_code in iter_lines_and_context(filepath): pattern = in_code_linkpattern if in_code else linkpattern for match in pattern.finditer(line): link = match.group(1) check_target_exists(filepath, link, in_code) if lnum not in results.links: line_results = [] results.links[lnum] = line_results line_results.append((link, True)) # type: ignore for filter in ignore: for lnum, links in results.links.items(): for index, match in enumerate(links): if match[1] and filter(match[0]): links[index] = (match[0], False) if results.links: yield results def check_target_exists( file: Path, link: Link, in_code: bool, without_hash_pattern: re.Pattern[str] = re.compile( r""" ^ ( [^#]* ) # path part before the hash (?: [#].* )? # hash $ """, re.VERBOSE, ), lang_id_pattern: re.Pattern[str] = re.compile( r""" /[a-z]{2}/docs/ # matches the two digit language identifier """, re.VERBOSE, ), ): if in_code: reference_point = CODE_EXAMPLES_REFERENCE_POINT cleanlink = link else: if link.startswith("https://"): EXTERNAL_URLS.add(link) return reference_point = file.parent match = without_hash_pattern.match(link) if not match: raise Exception("could not match link, this should not happen") cleanlink = match.group(1).strip() if not cleanlink: return joined = reference_point / cleanlink try: joined.resolve(strict=True) except FileNotFoundError: try: joined = Path(lang_id_pattern.sub("/en/docs/", str(joined))).resolve( strict=True ) except FileNotFoundError: NONEXISTING_TARGETS.add(joined.resolve(strict=False)) @dataclass class IsExternal: desc: str = "a https link to an external url" def __call__(self, link: Link): if link.startswith("https://"): return True return False @dataclass class WellFormed: desc: str = "a local relative link to a .md or a .png" word: str = r""" [a-z][a-z0-9]* (?: [-_] [a-z0-9]+ )* """ path: str = rf""" (?: [.][.]/ )* (?: {word}/ )* """ filename: str = rf""" {word}[.](?: md|png|py ) """ hash: str = rf""" (?: [#][^#]+ ) """ wellformed_pat: re.Pattern[str] = re.compile( rf""" ^ (?: @@ -196,31 +207,28 @@ class WellFormed: re.VERBOSE, ) def __call__(self, link: Link): if self.wellformed_pat.match(link): return True return False if __name__ == "__main__": filters = [IsExternal(), WellFormed()] all_results: list[str] = [] for file_result in iter_file_results(DOCS_ROOT, ignore=filters): file_result = str(file_result) if file_result: all_results.append(file_result) if all_results: print("\nThese links are not well-formed:\n") print("\n".join(all_results)) else: print(f"\nEach link under {DOCS_ROOT} is:\n") for filter in filters: print(f"* {filter.desc}") if NONEXISTING_TARGETS: print("\nThese files are referenced in links but do not exist:") for url in sorted(NONEXISTING_TARGETS): print(f" {url}") -
Nils Lindemann revised this gist
Oct 28, 2023 . 1 changed file with 35 additions and 35 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -63,6 +63,37 @@ def iter_relevant_lines(file: Path) -> Generator[tuple[line_number, line], Any, yield (lnum, line) def iter_file_results( path: str, ignore: list[Callable[[link], bool]] = [], linkpat: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] \( ( [^)]* ) \) """, re.VERBOSE, ), ) -> Generator[FileResults, Any, None]: for file in iter_markdowns(path): file_results = FileResults(file=file) for lnum, line in iter_relevant_lines(file): for match in linkpat.finditer(line): check_target_exists(file, match.group(1)) if lnum not in file_results.matches: line_results = [] file_results.matches[lnum] = line_results line_results.append((match.group(1), True)) # type: ignore for filter in ignore: for lnum, matches in file_results.matches.items(): for index, match in enumerate(matches): if match[1] and filter(match[0]): matches[index] = (match[0], False) if file_results.matches: yield file_results def check_target_exists( file: Path, url: link, @@ -104,37 +135,6 @@ def check_target_exists( NONEXISTING_TARGETS.add(hashless_url.resolve(strict=False)) # @dataclass # class IsPng: # desc: str = "is a link to a .png" @@ -205,10 +205,10 @@ def __call__(self, url: link): if __name__ == "__main__": counter = 0 filters = [IsExternal(), WellFormed()] for file_results in iter_file_results(SEARCH_HERE, ignore=filters): file_results = str(file_results) if file_results: print(file_results) counter += 1 if not counter: print(f"All links under {SEARCH_HERE} match one of these criteria:") -
Nils Lindemann revised this gist
Oct 28, 2023 . 1 changed file with 6 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -147,7 +147,7 @@ def iter_file_matches( @dataclass class IsExternal: desc: str = "is a https link to an external url" def __call__(self, url: link): if url.startswith("https://"): @@ -156,8 +156,8 @@ def __call__(self, url: link): @dataclass class WellFormed: desc: str = "is a local relative link to a .md or a .png" word: str = r""" (?: @@ -204,14 +204,14 @@ def __call__(self, url: link): if __name__ == "__main__": counter = 0 filters = [IsExternal(), WellFormed()] for file_matches in iter_file_matches(SEARCH_HERE, ignore=filters): file_matches = str(file_matches) if file_matches: print(file_matches) counter += 1 if not counter: print(f"All links under {SEARCH_HERE} match one of these criteria:") for filter in filters: print(f" * {filter.desc}") @@ -221,6 +221,6 @@ def __call__(self, url: link): # print(f" {url}") if NONEXISTING_TARGETS: print("\nThese non-existing targets are referenced by some links:") for url in sorted(NONEXISTING_TARGETS): print(f" {url}") -
Nils Lindemann created this gist
Oct 28, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,226 @@ import os import re from dataclasses import dataclass, field from pathlib import Path from typing import Any, Callable, Generator link = str raw_path = str line_number = int line = str # config SEARCH_HERE = "../docs" # end config NONEXISTING_TARGETS: set[Path] = set() EXTERNAL_URLS: set[link] = set() @dataclass class FileResults: file: Path matches: dict[line_number, list[tuple[link, bool]]] = field(default_factory=dict) def __str__(self): result = [str(self.file), "\n"] for lnum, matches in self.matches.items(): minus_filter = [match[0] for match in matches if match[1]] if minus_filter: result.extend( [ " ", "[", str(lnum), "] ", ", ".join(minus_filter), "\n", ] ) if len(result) > 2: return "".join(result) return "" def iter_markdowns(path: raw_path) -> Generator[Path, Any, None]: for root, _, files in os.walk(path): root = Path(root) for file in files: if file.endswith(".md"): yield root / file def iter_relevant_lines(file: Path) -> Generator[tuple[line_number, line], Any, None]: with file.open("r", encoding="utf-8") as f: in_code = False for lnum, line in enumerate(f, start=1): if line.lstrip().startswith("```"): in_code = not in_code continue if not in_code: yield (lnum, line) def check_target_exists( file: Path, url: link, strip_hash_pat: re.Pattern[str] = re.compile( r""" ^ ( [^#]* ) (?: [#].* )? $ """, re.VERBOSE, ), replace_pat: re.Pattern[str] = re.compile( r""" [a-z]{2}/docs/ """, re.VERBOSE, ), ): if url.startswith("https://"): EXTERNAL_URLS.add(url) return match = strip_hash_pat.match(url) if not match: raise Exception("could not match url, this should not happen") else: targetpath = match.group(1).strip() if not targetpath: return hashless_url = file.parent / targetpath try: hashless_url.resolve(strict=True) except FileNotFoundError: try: hashless_url = Path( replace_pat.sub("en/docs/", str(hashless_url)) ).resolve(strict=True) except FileNotFoundError: NONEXISTING_TARGETS.add(hashless_url.resolve(strict=False)) def iter_file_matches( path: str, ignore: list[Callable[[link], bool]] = [], linkpat: re.Pattern[str] = re.compile( r""" \[ [^\]]* \] \( ( [^)]* ) \) """, re.VERBOSE, ), ) -> Generator[FileResults, Any, None]: for file in iter_markdowns(path): file_results = FileResults(file=file) for lnum, line in iter_relevant_lines(file): for match in linkpat.finditer(line): check_target_exists(file, match.group(1)) if lnum not in file_results.matches: line_results = [] file_results.matches[lnum] = line_results line_results.append((match.group(1), True)) # type: ignore for filter in ignore: for lnum, matches in file_results.matches.items(): for index, match in enumerate(matches): if match[1] and filter(match[0]): matches[index] = (match[0], False) if file_results.matches: yield file_results # @dataclass # class IsPng: # desc: str = "is a link to a .png" # def __call__(self, url: link): # if url.endswith(".png"): # return True # return False @dataclass class IsExternal: desc: str = "is a link to an external file" def __call__(self, url: link): if url.startswith("https://"): return True return False @dataclass class WellFormatted: desc: str = "is a link which is well formatted" word: str = r""" (?: [a-z][a-z0-9]* - )* [a-z][a-z0-9]* """ path: str = rf""" (?: [.][.]/ )* (?: {word}/ )* """ filename: str = rf""" {word}[.](?: md|png ) """ hash: str = rf""" (?: [#][^#]+ ) """ ok_url_pat: re.Pattern[str] = re.compile( rf""" ^ (?: (?: {path} {filename} {hash}? ) | {hash} ) $ """, re.VERBOSE, ) def __call__(self, url: link): if self.ok_url_pat.match(url): return True return False if __name__ == "__main__": counter = 0 filters = [IsExternal(), WellFormatted()] for file_matches in iter_file_matches(SEARCH_HERE, ignore=filters): file_matches = str(file_matches) if file_matches: print(file_matches) counter += 1 if not counter: print(f"All links under {SEARCH_HERE} match one of those criteria:") for filter in filters: print(f" * {filter.desc}") # if EXTERNAL_URLS: # print("\nExternal URLs:") # for url in sorted(EXTERNAL_URLS): # print(f" {url}") if NONEXISTING_TARGETS: print("\nThese non-existing Targets are referenced by some links:") for url in sorted(NONEXISTING_TARGETS): print(f" {url}")