nilslindemann · October 31, 2023 08:27 · Oct 31, 2023 · Oct 30, 2023 · Oct 30, 2023 · Oct 29, 2023
diff --git a/linkchecker.py b/linkchecker.py
@@ -77,7 +77,7 @@ def iter_lines_and_context(
 
 def iter_file_search_results(
     root: Path,
-    ignore: list[Callable[[Url], IfFilterMatches]] = [],
+    ignore: list[Callable[[Url], IfFilterMatches]],
     linkpattern: re.Pattern[str] = re.compile(
         r"""
         \[ [^\]]* \]
@@ -108,7 +108,7 @@ def iter_file_search_results(
                     results.links[lnum] = line_results
                 line_results.append((link, True))  # type: ignore
         for filter in ignore:
-            for lnum, links in results.links.items():
+            for links in results.links.values():
                 for index, link in enumerate(links):
                     if link[1] and filter(link[0]):
                         links[index] = (link[0], False)
@@ -174,7 +174,7 @@ def __call__(self, url: Url):
 
 @dataclass
 class IsWellFormed:
-    desc: str = "a local relative link to a .md / .png / .py"
+    desc: str = "a well-formed local relative link to a .md / .png / .py"
 
     word: PatternSnippet = r"""
     [a-z][a-z0-9]*
@@ -192,7 +192,7 @@ class IsWellFormed:
     {word}[.](?: md|png|py )
     """
 
-    hash: PatternSnippet = rf"""
+    hash: PatternSnippet = r"""
     (?: [#][^#]+ )
     """
 
@@ -227,7 +227,10 @@ def __call__(self, url: Url):
         if searchresult:
             nasty_urls.append(searchresult)
     if nasty_urls:
-        print("\nThese links are not well-formed:\n")
+        print(f"\nThese links under {DOCS_ROOT} are not:\n")
+        for filter in filters:
+            print(f"* {filter.desc}")
+        print()
         print("\n".join(nasty_urls))
     else:
         print(f"\nEach link under {DOCS_ROOT} is:\n")

diff --git a/linkchecker.py b/linkchecker.py
@@ -9,7 +9,8 @@
 
 Line = str
 LineNumber = int
-Link = str
+Url = str
+SearchResult = str
 PatternSnippet = str
 IfNotFiltered = bool
 IfFilterMatches = bool
@@ -24,13 +25,13 @@
 CODE_EXAMPLES_REFERENCE_POINT = DOCS_ROOT / "en/docs"
 
 NONEXISTING_TARGETS: set[Path] = set()
-EXTERNAL_URLS: set[Link] = set()
+EXTERNAL_URLS: set[Url] = set()
 
 
 @dataclass
 class FileSearchResults:
     filepath: Path
-    links: dict[LineNumber, list[tuple[Link, IfNotFiltered]]] = field(
+    links: dict[LineNumber, list[tuple[Url, IfNotFiltered]]] = field(
         default_factory=dict
     )
 
@@ -76,7 +77,7 @@ def iter_lines_and_context(
 
 def iter_file_search_results(
     root: Path,
-    ignore: list[Callable[[Link], IfFilterMatches]] = [],
+    ignore: list[Callable[[Url], IfFilterMatches]] = [],
     linkpattern: re.Pattern[str] = re.compile(
         r"""
         \[ [^\]]* \]
@@ -117,7 +118,7 @@ def iter_file_search_results(
 
 def check_target_exists(
     file: Path,
-    link: Link,
+    url: Url,
     in_code: bool,
     without_hash_pattern: re.Pattern[str] = re.compile(
         r"""
@@ -137,19 +138,19 @@ def check_target_exists(
 ):
     if in_code:
         reference_point = CODE_EXAMPLES_REFERENCE_POINT
-        cleanlink = link
+        cleanurl = url
     else:
-        if link.startswith("https://"):
-            EXTERNAL_URLS.add(link)
+        if url.startswith("https://"):
+            EXTERNAL_URLS.add(url)
             return
         reference_point = file.parent
-        match = without_hash_pattern.match(link)
+        match = without_hash_pattern.match(url)
         if not match:
             raise Exception("could not match link, this should not happen")
-        cleanlink = match.group(1)
-        if not cleanlink:
+        cleanurl = match.group(1)
+        if not cleanurl:
             return
-    joined = reference_point / cleanlink
+    joined = reference_point / cleanurl
     try:
         joined.resolve(strict=True)
     except FileNotFoundError:
@@ -165,8 +166,8 @@ def check_target_exists(
 class IsExternal:
     desc: str = "a https link to an external url"
 
-    def __call__(self, link: Link):
-        if link.startswith("https://"):
+    def __call__(self, url: Url):
+        if url.startswith("https://"):
             return True
         return False
 
@@ -212,22 +213,22 @@ class IsWellFormed:
         re.VERBOSE,
     )
 
-    def __call__(self, link: Link):
-        if self.wellformed_pat.match(link):
+    def __call__(self, url: Url):
+        if self.wellformed_pat.match(url):
             return True
         return False
 
 
 if __name__ == "__main__":
     filters = [IsExternal(), IsWellFormed()]
-    nasty_links: list[str] = []
-    for result in iter_file_search_results(DOCS_ROOT, ignore=filters):
-        result = str(result)
-        if result:
-            nasty_links.append(result)
-    if nasty_links:
+    nasty_urls: list[SearchResult] = []
+    for searchresult in iter_file_search_results(DOCS_ROOT, ignore=filters):
+        searchresult = str(searchresult)
+        if searchresult:
+            nasty_urls.append(searchresult)
+    if nasty_urls:
         print("\nThese links are not well-formed:\n")
-        print("\n".join(nasty_links))
+        print("\n".join(nasty_urls))
     else:
         print(f"\nEach link under {DOCS_ROOT} is:\n")
         for filter in filters:

diff --git a/linkchecker.py b/linkchecker.py
@@ -7,9 +7,13 @@
 from pathlib import Path
 from typing import Any, Callable, Generator
 
-Link = str
-LineNumber = int
 Line = str
+LineNumber = int
+Link = str
+PatternSnippet = str
+IfNotFiltered = bool
+IfFilterMatches = bool
+IfInCode = bool
 
 # config
 
@@ -26,7 +30,9 @@
 @dataclass
 class FileSearchResults:
     filepath: Path
-    links: dict[LineNumber, list[tuple[Link, bool]]] = field(default_factory=dict)
+    links: dict[LineNumber, list[tuple[Link, IfNotFiltered]]] = field(
+        default_factory=dict
+    )
 
     def __str__(self):
         result = ["    ", str(self.filepath), "\n"]
@@ -58,7 +64,7 @@ def iter_markdowns(path: Path) -> Generator[Path, Any, None]:
 
 def iter_lines_and_context(
     filepath: Path,
-) -> Generator[tuple[LineNumber, Line, bool], Any, None]:
+) -> Generator[tuple[LineNumber, Line, IfInCode], Any, None]:
     with filepath.open("r", encoding="utf-8") as f:
         in_code = False
         for lnum, line in enumerate(f, start=1):
@@ -70,7 +76,7 @@ def iter_lines_and_context(
 
 def iter_file_search_results(
     root: Path,
-    ignore: list[Callable[[Link], bool]] = [],
+    ignore: list[Callable[[Link], IfFilterMatches]] = [],
     linkpattern: re.Pattern[str] = re.compile(
         r"""
         \[ [^\]]* \]
@@ -169,23 +175,23 @@ def __call__(self, link: Link):
 class IsWellFormed:
     desc: str = "a local relative link to a .md / .png / .py"
 
-    word: str = r"""
+    word: PatternSnippet = r"""
     [a-z][a-z0-9]*
     (?:
         [-_]
         [a-z0-9]+
     )*
     """
 
-    path: str = rf"""
+    path: PatternSnippet = rf"""
     (?: [.][.]/ )* (?: {word}/ )*
     """
 
-    filename: str = rf"""
+    filename: PatternSnippet = rf"""
     {word}[.](?: md|png|py )
     """
 
-    hash: str = rf"""
+    hash: PatternSnippet = rf"""
     (?: [#][^#]+ )
     """
 

diff --git a/linkchecker.py b/linkchecker.py
@@ -24,7 +24,7 @@
 
 
 @dataclass
-class FileLinks:
+class FileSearchResults:
     filepath: Path
     links: dict[LineNumber, list[tuple[Link, bool]]] = field(default_factory=dict)
 
@@ -68,7 +68,7 @@ def iter_lines_and_context(
             yield (lnum, line, in_code)
 
 
-def iter_file_results(
+def iter_file_search_results(
     root: Path,
     ignore: list[Callable[[Link], bool]] = [],
     linkpattern: re.Pattern[str] = re.compile(
@@ -88,9 +88,9 @@ def iter_file_results(
         """,
         re.VERBOSE,
     ),
-) -> Generator[FileLinks, Any, None]:
+) -> Generator[FileSearchResults, Any, None]:
     for filepath in iter_markdowns(root):
-        results = FileLinks(filepath=filepath.resolve(strict=True))
+        results = FileSearchResults(filepath=filepath.resolve(strict=True))
         for lnum, line, in_code in iter_lines_and_context(filepath):
             pattern = in_code_linkpattern if in_code else linkpattern
             for match in pattern.finditer(line):
@@ -102,9 +102,9 @@ def iter_file_results(
                 line_results.append((link, True))  # type: ignore
         for filter in ignore:
             for lnum, links in results.links.items():
-                for index, match in enumerate(links):
-                    if match[1] and filter(match[0]):
-                        links[index] = (match[0], False)
+                for index, link in enumerate(links):
+                    if link[1] and filter(link[0]):
+                        links[index] = (link[0], False)
         if results.links:
             yield results
 
@@ -116,8 +116,8 @@ def check_target_exists(
     without_hash_pattern: re.Pattern[str] = re.compile(
         r"""
         ^
-        ( [^#]* )     # path part before the hash
-        (?: [#].* )?  # hash
+        ( [^#]* )     # part before the hash
+        (?: [#].* )?  # the hash
         $
         """,
         re.VERBOSE,
@@ -140,7 +140,7 @@ def check_target_exists(
         match = without_hash_pattern.match(link)
         if not match:
             raise Exception("could not match link, this should not happen")
-        cleanlink = match.group(1).strip()
+        cleanlink = match.group(1)
         if not cleanlink:
             return
     joined = reference_point / cleanlink
@@ -166,7 +166,7 @@ def __call__(self, link: Link):
 
 
 @dataclass
-class WellFormed:
+class IsWellFormed:
     desc: str = "a local relative link to a .md / .png / .py"
 
     word: str = r"""
@@ -213,15 +213,15 @@ def __call__(self, link: Link):
 
 
 if __name__ == "__main__":
-    filters = [IsExternal(), WellFormed()]
-    all_results: list[str] = []
-    for file_result in iter_file_results(DOCS_ROOT, ignore=filters):
-        file_result = str(file_result)
-        if file_result:
-            all_results.append(file_result)
-    if all_results:
+    filters = [IsExternal(), IsWellFormed()]
+    nasty_links: list[str] = []
+    for result in iter_file_search_results(DOCS_ROOT, ignore=filters):
+        result = str(result)
+        if result:
+            nasty_links.append(result)
+    if nasty_links:
         print("\nThese links are not well-formed:\n")
-        print("\n".join(all_results))
+        print("\n".join(nasty_links))
     else:
         print(f"\nEach link under {DOCS_ROOT} is:\n")
         for filter in filters:

diff --git a/linkchecker.py b/linkchecker.py
@@ -167,7 +167,7 @@ def __call__(self, link: Link):
 
 @dataclass
 class WellFormed:
-    desc: str = "a local relative link to a .md or a .png"
+    desc: str = "a local relative link to a .md / .png / .py"
 
     word: str = r"""
     [a-z][a-z0-9]*

diff --git a/linkchecker.py b/linkchecker.py
@@ -8,7 +8,6 @@
 from typing import Any, Callable, Generator
 
 Link = str
-RawPath = str
 LineNumber = int
 Line = str
 

diff --git a/linkchecker.py b/linkchecker.py
@@ -1,72 +1,78 @@
+# Link checker script for https://github.com/tiangolo/fastapi
+# Place it under fastapi/scripts or configure DOCS_ROOT below
+
 import os
 import re
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Generator
 
-link = str
-raw_path = str
-line_number = int
-line = str
+Link = str
+RawPath = str
+LineNumber = int
+Line = str
 
 # config
 
-SEARCH_HERE = "../docs"
+DOCS_ROOT = Path("../docs").resolve(strict=True)
 
 # end config
 
+CODE_EXAMPLES_REFERENCE_POINT = DOCS_ROOT / "en/docs"
+
 NONEXISTING_TARGETS: set[Path] = set()
-EXTERNAL_URLS: set[link] = set()
+EXTERNAL_URLS: set[Link] = set()
 
 
 @dataclass
-class FileResults:
-    file: Path
-    matches: dict[line_number, list[tuple[link, bool]]] = field(default_factory=dict)
+class FileLinks:
+    filepath: Path
+    links: dict[LineNumber, list[tuple[Link, bool]]] = field(default_factory=dict)
 
     def __str__(self):
-        result = [str(self.file), "\n"]
-        for lnum, matches in self.matches.items():
-            minus_filter = [match[0] for match in matches if match[1]]
-            if minus_filter:
+        result = ["    ", str(self.filepath), "\n"]
+        for lnum, links in self.links.items():
+            not_filtered = [match[0] for match in links if match[1]]
+            if not_filtered:
                 result.extend(
                     [
                         "    ",
                         "[",
                         str(lnum),
                         "] ",
-                        ", ".join(minus_filter),
+                        ", ".join(not_filtered),
                         "\n",
                     ]
                 )
-        if len(result) > 2:
+        if len(result) > 3:
             return "".join(result)
         return ""
 
 
-def iter_markdowns(path: raw_path) -> Generator[Path, Any, None]:
+def iter_markdowns(path: Path) -> Generator[Path, Any, None]:
     for root, _, files in os.walk(path):
         root = Path(root)
         for file in files:
             if file.endswith(".md"):
                 yield root / file
 
 
-def iter_relevant_lines(file: Path) -> Generator[tuple[line_number, line], Any, None]:
-    with file.open("r", encoding="utf-8") as f:
+def iter_lines_and_context(
+    filepath: Path,
+) -> Generator[tuple[LineNumber, Line, bool], Any, None]:
+    with filepath.open("r", encoding="utf-8") as f:
         in_code = False
         for lnum, line in enumerate(f, start=1):
             if line.lstrip().startswith("```"):
                 in_code = not in_code
                 continue
-            if not in_code:
-                yield (lnum, line)
+            yield (lnum, line, in_code)
 
 
 def iter_file_results(
-    path: str,
-    ignore: list[Callable[[link], bool]] = [],
-    linkpat: re.Pattern[str] = re.compile(
+    root: Path,
+    ignore: list[Callable[[Link], bool]] = [],
+    linkpattern: re.Pattern[str] = re.compile(
         r"""
         \[ [^\]]* \]
         \(
@@ -75,111 +81,116 @@ def iter_file_results(
         """,
         re.VERBOSE,
     ),
-) -> Generator[FileResults, Any, None]:
-    for file in iter_markdowns(path):
-        file_results = FileResults(file=file)
-        for lnum, line in iter_relevant_lines(file):
-            for match in linkpat.finditer(line):
-                check_target_exists(file, match.group(1))
-                if lnum not in file_results.matches:
+    in_code_linkpattern: re.Pattern[str] = re.compile(
+        r"""
+        [{]!> [^\S\n]*
+        ( [\S]+?[.]py )
+        [^\S\n]* ![}]
+        """,
+        re.VERBOSE,
+    ),
+) -> Generator[FileLinks, Any, None]:
+    for filepath in iter_markdowns(root):
+        results = FileLinks(filepath=filepath.resolve(strict=True))
+        for lnum, line, in_code in iter_lines_and_context(filepath):
+            pattern = in_code_linkpattern if in_code else linkpattern
+            for match in pattern.finditer(line):
+                link = match.group(1)
+                check_target_exists(filepath, link, in_code)
+                if lnum not in results.links:
                     line_results = []
-                    file_results.matches[lnum] = line_results
-                line_results.append((match.group(1), True))  # type: ignore
+                    results.links[lnum] = line_results
+                line_results.append((link, True))  # type: ignore
         for filter in ignore:
-            for lnum, matches in file_results.matches.items():
-                for index, match in enumerate(matches):
+            for lnum, links in results.links.items():
+                for index, match in enumerate(links):
                     if match[1] and filter(match[0]):
-                        matches[index] = (match[0], False)
-        if file_results.matches:
-            yield file_results
+                        links[index] = (match[0], False)
+        if results.links:
+            yield results
 
 
 def check_target_exists(
     file: Path,
-    url: link,
-    strip_hash_pat: re.Pattern[str] = re.compile(
+    link: Link,
+    in_code: bool,
+    without_hash_pattern: re.Pattern[str] = re.compile(
         r"""
         ^
-        ( [^#]* )
-        (?: [#].* )?
+        ( [^#]* )     # path part before the hash
+        (?: [#].* )?  # hash
         $
         """,
         re.VERBOSE,
     ),
-    replace_pat: re.Pattern[str] = re.compile(
+    lang_id_pattern: re.Pattern[str] = re.compile(
         r"""
-        [a-z]{2}/docs/
+        /[a-z]{2}/docs/  # matches the two digit language identifier
         """,
         re.VERBOSE,
     ),
 ):
-    if url.startswith("https://"):
-        EXTERNAL_URLS.add(url)
-        return
-    match = strip_hash_pat.match(url)
-    if not match:
-        raise Exception("could not match url, this should not happen")
+    if in_code:
+        reference_point = CODE_EXAMPLES_REFERENCE_POINT
+        cleanlink = link
     else:
-        targetpath = match.group(1).strip()
-        if not targetpath:
+        if link.startswith("https://"):
+            EXTERNAL_URLS.add(link)
             return
-        hashless_url = file.parent / targetpath
+        reference_point = file.parent
+        match = without_hash_pattern.match(link)
+        if not match:
+            raise Exception("could not match link, this should not happen")
+        cleanlink = match.group(1).strip()
+        if not cleanlink:
+            return
+    joined = reference_point / cleanlink
+    try:
+        joined.resolve(strict=True)
+    except FileNotFoundError:
         try:
-            hashless_url.resolve(strict=True)
+            joined = Path(lang_id_pattern.sub("/en/docs/", str(joined))).resolve(
+                strict=True
+            )
         except FileNotFoundError:
-            try:
-                hashless_url = Path(
-                    replace_pat.sub("en/docs/", str(hashless_url))
-                ).resolve(strict=True)
-            except FileNotFoundError:
-                NONEXISTING_TARGETS.add(hashless_url.resolve(strict=False))
-
-
-# @dataclass
-# class IsPng:
-#     desc: str = "is a link to a .png"
-
-#     def __call__(self, url: link):
-#         if url.endswith(".png"):
-#             return True
-#         return False
+            NONEXISTING_TARGETS.add(joined.resolve(strict=False))
 
 
 @dataclass
 class IsExternal:
-    desc: str = "is a https link to an external url"
+    desc: str = "a https link to an external url"
 
-    def __call__(self, url: link):
-        if url.startswith("https://"):
+    def __call__(self, link: Link):
+        if link.startswith("https://"):
             return True
         return False
 
 
 @dataclass
 class WellFormed:
-    desc: str = "is a local relative link to a .md or a .png"
+    desc: str = "a local relative link to a .md or a .png"
 
     word: str = r"""
+    [a-z][a-z0-9]*
     (?:
-        [a-z][a-z0-9]*
-        -
+        [-_]
+        [a-z0-9]+
     )*
-    [a-z][a-z0-9]*
     """
 
     path: str = rf"""
     (?: [.][.]/ )* (?: {word}/ )*
     """
 
     filename: str = rf"""
-    {word}[.](?: md|png )
+    {word}[.](?: md|png|py )
     """
 
     hash: str = rf"""
     (?: [#][^#]+ )
     """
 
-    ok_url_pat: re.Pattern[str] = re.compile(
+    wellformed_pat: re.Pattern[str] = re.compile(
         rf"""
         ^
         (?:
@@ -196,31 +207,28 @@ class WellFormed:
         re.VERBOSE,
     )
 
-    def __call__(self, url: link):
-        if self.ok_url_pat.match(url):
+    def __call__(self, link: Link):
+        if self.wellformed_pat.match(link):
             return True
         return False
 
 
 if __name__ == "__main__":
-    counter = 0
     filters = [IsExternal(), WellFormed()]
-    for file_results in iter_file_results(SEARCH_HERE, ignore=filters):
-        file_results = str(file_results)
-        if file_results:
-            print(file_results)
-            counter += 1
-    if not counter:
-        print(f"All links under {SEARCH_HERE} match one of these criteria:")
+    all_results: list[str] = []
+    for file_result in iter_file_results(DOCS_ROOT, ignore=filters):
+        file_result = str(file_result)
+        if file_result:
+            all_results.append(file_result)
+    if all_results:
+        print("\nThese links are not well-formed:\n")
+        print("\n".join(all_results))
+    else:
+        print(f"\nEach link under {DOCS_ROOT} is:\n")
         for filter in filters:
-            print(f"    * {filter.desc}")
-
-    # if EXTERNAL_URLS:
-    #     print("\nExternal URLs:")
-    #     for url in sorted(EXTERNAL_URLS):
-    #         print(f"    {url}")
+            print(f"* {filter.desc}")
 
     if NONEXISTING_TARGETS:
-        print("\nThese non-existing targets are referenced by some links:")
+        print("\nThese files are referenced in links but do not exist:")
         for url in sorted(NONEXISTING_TARGETS):
             print(f"    {url}")
diff --git a/linkchecker.py b/linkchecker.py
@@ -63,6 +63,37 @@ def iter_relevant_lines(file: Path) -> Generator[tuple[line_number, line], Any,
                 yield (lnum, line)
 
 
+def iter_file_results(
+    path: str,
+    ignore: list[Callable[[link], bool]] = [],
+    linkpat: re.Pattern[str] = re.compile(
+        r"""
+        \[ [^\]]* \]
+        \(
+            ( [^)]* )
+        \)
+        """,
+        re.VERBOSE,
+    ),
+) -> Generator[FileResults, Any, None]:
+    for file in iter_markdowns(path):
+        file_results = FileResults(file=file)
+        for lnum, line in iter_relevant_lines(file):
+            for match in linkpat.finditer(line):
+                check_target_exists(file, match.group(1))
+                if lnum not in file_results.matches:
+                    line_results = []
+                    file_results.matches[lnum] = line_results
+                line_results.append((match.group(1), True))  # type: ignore
+        for filter in ignore:
+            for lnum, matches in file_results.matches.items():
+                for index, match in enumerate(matches):
+                    if match[1] and filter(match[0]):
+                        matches[index] = (match[0], False)
+        if file_results.matches:
+            yield file_results
+
+
 def check_target_exists(
     file: Path,
     url: link,
@@ -104,37 +135,6 @@ def check_target_exists(
                 NONEXISTING_TARGETS.add(hashless_url.resolve(strict=False))
 
 
-def iter_file_matches(
-    path: str,
-    ignore: list[Callable[[link], bool]] = [],
-    linkpat: re.Pattern[str] = re.compile(
-        r"""
-        \[ [^\]]* \]
-        \(
-            ( [^)]* )
-        \)
-        """,
-        re.VERBOSE,
-    ),
-) -> Generator[FileResults, Any, None]:
-    for file in iter_markdowns(path):
-        file_results = FileResults(file=file)
-        for lnum, line in iter_relevant_lines(file):
-            for match in linkpat.finditer(line):
-                check_target_exists(file, match.group(1))
-                if lnum not in file_results.matches:
-                    line_results = []
-                    file_results.matches[lnum] = line_results
-                line_results.append((match.group(1), True))  # type: ignore
-        for filter in ignore:
-            for lnum, matches in file_results.matches.items():
-                for index, match in enumerate(matches):
-                    if match[1] and filter(match[0]):
-                        matches[index] = (match[0], False)
-        if file_results.matches:
-            yield file_results
-
-
 # @dataclass
 # class IsPng:
 #     desc: str = "is a link to a .png"
@@ -205,10 +205,10 @@ def __call__(self, url: link):
 if __name__ == "__main__":
     counter = 0
     filters = [IsExternal(), WellFormed()]
-    for file_matches in iter_file_matches(SEARCH_HERE, ignore=filters):
-        file_matches = str(file_matches)
-        if file_matches:
-            print(file_matches)
+    for file_results in iter_file_results(SEARCH_HERE, ignore=filters):
+        file_results = str(file_results)
+        if file_results:
+            print(file_results)
             counter += 1
     if not counter:
         print(f"All links under {SEARCH_HERE} match one of these criteria:")

diff --git a/linkchecker.py b/linkchecker.py
@@ -147,7 +147,7 @@ def iter_file_matches(
 
 @dataclass
 class IsExternal:
-    desc: str = "is a link to an external file"
+    desc: str = "is a https link to an external url"
 
     def __call__(self, url: link):
         if url.startswith("https://"):
@@ -156,8 +156,8 @@ def __call__(self, url: link):
 
 
 @dataclass
-class WellFormatted:
-    desc: str = "is a link which is well formatted"
+class WellFormed:
+    desc: str = "is a local relative link to a .md or a .png"
 
     word: str = r"""
     (?:
@@ -204,14 +204,14 @@ def __call__(self, url: link):
 
 if __name__ == "__main__":
     counter = 0
-    filters = [IsExternal(), WellFormatted()]
+    filters = [IsExternal(), WellFormed()]
     for file_matches in iter_file_matches(SEARCH_HERE, ignore=filters):
         file_matches = str(file_matches)
         if file_matches:
             print(file_matches)
             counter += 1
     if not counter:
-        print(f"All links under {SEARCH_HERE} match one of those criteria:")
+        print(f"All links under {SEARCH_HERE} match one of these criteria:")
         for filter in filters:
             print(f"    * {filter.desc}")
 
@@ -221,6 +221,6 @@ def __call__(self, url: link):
     #         print(f"    {url}")
 
     if NONEXISTING_TARGETS:
-        print("\nThese non-existing Targets are referenced by some links:")
+        print("\nThese non-existing targets are referenced by some links:")
         for url in sorted(NONEXISTING_TARGETS):
             print(f"    {url}")
diff --git a/linkchecker.py b/linkchecker.py
@@ -0,0 +1,226 @@
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Callable, Generator
+
+link = str
+raw_path = str
+line_number = int
+line = str
+
+# config
+
+SEARCH_HERE = "../docs"
+
+# end config
+
+NONEXISTING_TARGETS: set[Path] = set()
+EXTERNAL_URLS: set[link] = set()
+
+
+@dataclass
+class FileResults:
+    file: Path
+    matches: dict[line_number, list[tuple[link, bool]]] = field(default_factory=dict)
+
+    def __str__(self):
+        result = [str(self.file), "\n"]
+        for lnum, matches in self.matches.items():
+            minus_filter = [match[0] for match in matches if match[1]]
+            if minus_filter:
+                result.extend(
+                    [
+                        "    ",
+                        "[",
+                        str(lnum),
+                        "] ",
+                        ", ".join(minus_filter),
+                        "\n",
+                    ]
+                )
+        if len(result) > 2:
+            return "".join(result)
+        return ""
+
+
+def iter_markdowns(path: raw_path) -> Generator[Path, Any, None]:
+    for root, _, files in os.walk(path):
+        root = Path(root)
+        for file in files:
+            if file.endswith(".md"):
+                yield root / file
+
+
+def iter_relevant_lines(file: Path) -> Generator[tuple[line_number, line], Any, None]:
+    with file.open("r", encoding="utf-8") as f:
+        in_code = False
+        for lnum, line in enumerate(f, start=1):
+            if line.lstrip().startswith("```"):
+                in_code = not in_code
+                continue
+            if not in_code:
+                yield (lnum, line)
+
+
+def check_target_exists(
+    file: Path,
+    url: link,
+    strip_hash_pat: re.Pattern[str] = re.compile(
+        r"""
+        ^
+        ( [^#]* )
+        (?: [#].* )?
+        $
+        """,
+        re.VERBOSE,
+    ),
+    replace_pat: re.Pattern[str] = re.compile(
+        r"""
+        [a-z]{2}/docs/
+        """,
+        re.VERBOSE,
+    ),
+):
+    if url.startswith("https://"):
+        EXTERNAL_URLS.add(url)
+        return
+    match = strip_hash_pat.match(url)
+    if not match:
+        raise Exception("could not match url, this should not happen")
+    else:
+        targetpath = match.group(1).strip()
+        if not targetpath:
+            return
+        hashless_url = file.parent / targetpath
+        try:
+            hashless_url.resolve(strict=True)
+        except FileNotFoundError:
+            try:
+                hashless_url = Path(
+                    replace_pat.sub("en/docs/", str(hashless_url))
+                ).resolve(strict=True)
+            except FileNotFoundError:
+                NONEXISTING_TARGETS.add(hashless_url.resolve(strict=False))
+
+
+def iter_file_matches(
+    path: str,
+    ignore: list[Callable[[link], bool]] = [],
+    linkpat: re.Pattern[str] = re.compile(
+        r"""
+        \[ [^\]]* \]
+        \(
+            ( [^)]* )
+        \)
+        """,
+        re.VERBOSE,
+    ),
+) -> Generator[FileResults, Any, None]:
+    for file in iter_markdowns(path):
+        file_results = FileResults(file=file)
+        for lnum, line in iter_relevant_lines(file):
+            for match in linkpat.finditer(line):
+                check_target_exists(file, match.group(1))
+                if lnum not in file_results.matches:
+                    line_results = []
+                    file_results.matches[lnum] = line_results
+                line_results.append((match.group(1), True))  # type: ignore
+        for filter in ignore:
+            for lnum, matches in file_results.matches.items():
+                for index, match in enumerate(matches):
+                    if match[1] and filter(match[0]):
+                        matches[index] = (match[0], False)
+        if file_results.matches:
+            yield file_results
+
+
+# @dataclass
+# class IsPng:
+#     desc: str = "is a link to a .png"
+
+#     def __call__(self, url: link):
+#         if url.endswith(".png"):
+#             return True
+#         return False
+
+
+@dataclass
+class IsExternal:
+    desc: str = "is a link to an external file"
+
+    def __call__(self, url: link):
+        if url.startswith("https://"):
+            return True
+        return False
+
+
+@dataclass
+class WellFormatted:
+    desc: str = "is a link which is well formatted"
+
+    word: str = r"""
+    (?:
+        [a-z][a-z0-9]*
+        -
+    )*
+    [a-z][a-z0-9]*
+    """
+
+    path: str = rf"""
+    (?: [.][.]/ )* (?: {word}/ )*
+    """
+
+    filename: str = rf"""
+    {word}[.](?: md|png )
+    """
+
+    hash: str = rf"""
+    (?: [#][^#]+ )
+    """
+
+    ok_url_pat: re.Pattern[str] = re.compile(
+        rf"""
+        ^
+        (?:
+            (?:
+                {path}
+                {filename}
+                {hash}?
+            )
+            |
+            {hash}
+        )
+        $
+        """,
+        re.VERBOSE,
+    )
+
+    def __call__(self, url: link):
+        if self.ok_url_pat.match(url):
+            return True
+        return False
+
+
+if __name__ == "__main__":
+    counter = 0
+    filters = [IsExternal(), WellFormatted()]
+    for file_matches in iter_file_matches(SEARCH_HERE, ignore=filters):
+        file_matches = str(file_matches)
+        if file_matches:
+            print(file_matches)
+            counter += 1
+    if not counter:
+        print(f"All links under {SEARCH_HERE} match one of those criteria:")
+        for filter in filters:
+            print(f"    * {filter.desc}")
+
+    # if EXTERNAL_URLS:
+    #     print("\nExternal URLs:")
+    #     for url in sorted(EXTERNAL_URLS):
+    #         print(f"    {url}")
+
+    if NONEXISTING_TARGETS:
+        print("\nThese non-existing Targets are referenced by some links:")
+        for url in sorted(NONEXISTING_TARGETS):
+            print(f"    {url}")
No results found