Skip to content

Instantly share code, notes, and snippets.

@meredoth
Last active August 3, 2025 13:33
Show Gist options
  • Save meredoth/a95398fdd5561d151bd5fd84d6ea2740 to your computer and use it in GitHub Desktop.
Save meredoth/a95398fdd5561d151bd5fd84d6ea2740 to your computer and use it in GitHub Desktop.

Revisions

  1. meredoth revised this gist Aug 3, 2025. 2 changed files with 54 additions and 8 deletions.
    14 changes: 14 additions & 0 deletions Check-Discourse-Topic-Links.ps1
    Original file line number Diff line number Diff line change
    @@ -70,5 +70,19 @@ while($True)
    }
    }
    }
    if($post.quote_count)
    {
    $excludeText = "discourse"
    $pattern = "\b(?:https?://|www\.)(?!.*$excludeText)\S+\b"

    if($matches = [regex]::Matches($post.cooked, $pattern))
    {
    Write-Output "Links Embedded in quotes of post number $($post.post_number) by $($post.username) : `n"

    $matches | ForEach-Object { $_.Value }

    Write-Verbose "Post body: $($post.cooked) `n"
    }
    }
    }
    }
    48 changes: 40 additions & 8 deletions CheckDiscourseTopicLinks.cs
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,3 @@
    // A C# script with the same functionality as the above powershell script

    using System.Text.Json;
    using System.Text.RegularExpressions;

    @@ -29,13 +27,16 @@

    foreach (var post in allPosts.EnumerateArray())
    {
    if (!post.TryGetProperty("link_counts", out var allLinks) ||
    allLinks.ValueKind != JsonValueKind.Array) continue;

    foreach (var link in allLinks.EnumerateArray())
    if (post.TryGetProperty("link_counts", out var allLinks) &&
    allLinks.ValueKind == JsonValueKind.Array)
    {
    ProcessLink(link, post);
    foreach (var link in allLinks.EnumerateArray())
    {
    ProcessLink(link, post);
    }
    }

    CheckQuoteLinks(post);
    }
    }
    catch (HttpRequestException)
    @@ -79,6 +80,37 @@ static void ProcessLink(JsonElement link, JsonElement post)
    }
    else
    {
    Console.WriteLine("No link text found\n");
    Console.WriteLine("No link text found in MetaData\n");
    }
    }

    void CheckQuoteLinks(JsonElement post)
    {
    int postNumber = post.GetProperty("post_number").GetInt32();

    int quoteCount = post.GetProperty("quote_count").GetInt32();

    string? username = post.GetProperty("username").GetString();
    if (username == null)
    throw new NullReferenceException($"Username property from the parsed json post: {post} is null!");

    string? postBody = post.GetProperty("cooked").GetString();
    if (postBody == null)
    throw new NullReferenceException($"Cooked property from the parsed json post: {post} is null!");

    if(quoteCount == 0) return;

    const string excludeText = "discourse";
    const string quotePattern = $"""\b(?:https?://|www\.)(?!.*{excludeText})\S+\b""";

    var matchInQuote = Regex.Matches(postBody, quotePattern);

    if (matchInQuote.Count <= 0) return;

    Console.WriteLine($"Links Embedded in quotes of post number {postNumber} by {username} :`n");

    foreach (var linkInQuote in matchInQuote)
    {
    Console.WriteLine(linkInQuote);
    }
    }
  2. meredoth revised this gist Jun 24, 2025. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions CheckDiscourseTopicLinks.cs
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,5 @@
    // A C# script with the same functionality as the above powershell script

    using System.Text.Json;
    using System.Text.RegularExpressions;

  3. meredoth revised this gist Jun 24, 2025. 1 changed file with 1 addition and 3 deletions.
    4 changes: 1 addition & 3 deletions CheckDiscourseTopicLinks.cs
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,3 @@
    // A C# script with the same functionality as the above powershell script

    using System.Text.Json;
    using System.Text.RegularExpressions;

    @@ -66,7 +64,7 @@ static void ProcessLink(JsonElement link, JsonElement post)
    throw new NullReferenceException($"link: {link} cooked property from the parsed json post: {post} is null!");

    string escapedLink = Regex.Escape(url);
    string pattern = $@"<a\s+[^>]*href=\""{escapedLink}\""[^>]*>(.*?)</a>";
    string pattern = $"""<a\s+[^>]*href=\"{escapedLink}\"[^>]*>(.*?)</a>""";

    Console.WriteLine($"External link found in post number: {postNumber} by {username}");
    Console.WriteLine(url);
  4. meredoth revised this gist Jun 24, 2025. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion CheckDiscourseTopicLinks.cs
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    # A C# script with the same functionality as the above powershell script
    // A C# script with the same functionality as the above powershell script

    using System.Text.Json;
    using System.Text.RegularExpressions;
  5. meredoth revised this gist Jun 24, 2025. 1 changed file with 84 additions and 0 deletions.
    84 changes: 84 additions & 0 deletions CheckDiscourseTopicLinks.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,84 @@
    # A C# script with the same functionality as the above powershell script

    using System.Text.Json;
    using System.Text.RegularExpressions;

    if (args.Length != 1)
    {
    Console.WriteLine("Usage: Program <DiscourseTopicUrl>");
    return;
    }

    string discourseTopic = args[0];
    int pageNumber = 0;
    HttpClient client = new();
    client.DefaultRequestHeaders.UserAgent.ParseAdd("Mozilla/5.0 (compatible; Chrome)");

    Console.WriteLine();

    while (true)
    {
    pageNumber++;
    string currentPage = $"{discourseTopic}.json?page={pageNumber}";

    try
    {
    string response = await client.GetStringAsync(currentPage);
    using JsonDocument document = JsonDocument.Parse(response);
    JsonElement allPosts = document.RootElement.GetProperty("post_stream").GetProperty("posts");

    foreach (var post in allPosts.EnumerateArray())
    {
    if (!post.TryGetProperty("link_counts", out var allLinks) ||
    allLinks.ValueKind != JsonValueKind.Array) continue;

    foreach (var link in allLinks.EnumerateArray())
    {
    ProcessLink(link, post);
    }
    }
    }
    catch (HttpRequestException)
    {
    break;
    }
    }

    return;

    static void ProcessLink(JsonElement link, JsonElement post)
    {
    if (!link.TryGetProperty("internal", out var internalLink) ||
    internalLink.ValueKind != JsonValueKind.False) return;

    int postNumber = post.GetProperty("post_number").GetInt32();

    string? url = link.GetProperty("url").GetString();
    if (url == null)
    throw new NullReferenceException($"link: {link} url property from the parsed json post: {post} is null!");

    string? username = post.GetProperty("username").GetString();
    if (username == null)
    throw new NullReferenceException($"link: {link} username property from the parsed json post: {post} is null!");

    string? postBody = post.GetProperty("cooked").GetString();
    if (postBody == null)
    throw new NullReferenceException($"link: {link} cooked property from the parsed json post: {post} is null!");

    string escapedLink = Regex.Escape(url);
    string pattern = $@"<a\s+[^>]*href=\""{escapedLink}\""[^>]*>(.*?)</a>";

    Console.WriteLine($"External link found in post number: {postNumber} by {username}");
    Console.WriteLine(url);

    Match match = Regex.Match(postBody, pattern);
    if (match.Success)
    {
    string linkText = match.Groups[1].Value;
    Console.WriteLine($"Link Text: {linkText}\n");
    }
    else
    {
    Console.WriteLine("No link text found\n");
    }
    }
  6. meredoth revised this gist Jun 24, 2025. 1 changed file with 16 additions and 2 deletions.
    18 changes: 16 additions & 2 deletions Check-Discourse-Topic-Links.ps1
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    Takes a discourse topic url and prints all posts that contain external links. Useful for identifying spam posts that hide links in subtle places, such as within commas or periods.
    .DESCRIPTION
    The Check-Discourse-Topic-Links script takes a URL and uses the Invoke-RestMethod to check each post for the presence of a link_counts field. If the field exists, the script checks whether its internal property is set to false. If so, it prints the post number, the username of the post's author, and the external link.
    The Check-Discourse-Topic-Links script takes a URL and uses the Invoke-RestMethod to check each post for the presence of a link_counts field. If the field exists, the script checks whether its internal property is set to false. If so, it prints the post number, the username of the post's author, the external link and the link text.
    .PARAMETER discourseTopic
    The discourse topic url to check. Note: the url must begin with http:// or https://
    @@ -51,7 +51,21 @@ while($True)
    if($link.internal -eq $False)
    {
    Write-Output "External link found in post number: $($post.post_number) by $($post.username)"
    Write-Output "$($link.url) `n"
    Write-Output "$($link.url)"

    $escapedLink = [regex]::Escape($($link.url))
    $pattern = "<a\s+[^>]*href=`"$escapedLink`"[^>]*>(.*?)</a>"

    if($post.cooked -match $pattern)
    {
    $linkText = $matches[1]
    Write-Output "Link Text: $linkText `n"
    }
    else
    {
    Write-Output "No link text found `n"
    }

    Write-Verbose "Post body: $($post.cooked) `n"
    }
    }
  7. meredoth revised this gist Jun 24, 2025. No changes.
  8. meredoth created this gist Jun 24, 2025.
    60 changes: 60 additions & 0 deletions Check-Discourse-Topic-Links.ps1
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,60 @@
    <#
    .SYNOPSIS
    Takes a discourse topic url and prints all posts that contain external links. Useful for identifying spam posts that hide links in subtle places, such as within commas or periods.
    .DESCRIPTION
    The Check-Discourse-Topic-Links script takes a URL and uses the Invoke-RestMethod to check each post for the presence of a link_counts field. If the field exists, the script checks whether its internal property is set to false. If so, it prints the post number, the username of the post's author, and the external link.
    .PARAMETER discourseTopic
    The discourse topic url to check. Note: the url must begin with http:// or https://
    .EXAMPLE
    .\Check-Discourse-Topic-Links.ps1 https://discussions.unity.com/t/is-it-a-good-idea-to-put-my-game-on-itch-io-first/1657629
    #>

    [CmdletBinding()]
    param
    (
    [Parameter(Mandatory=$True)]
    [string]$discourseTopic
    )

    $pageNumber = 0
    Write-Host "`n"

    while($True)
    {
    $pageNumber++
    $currentPage = $discourseTopic + ".json?page=" + $pageNumber

    try
    {
    Write-Verbose "Getting all posts from $discourseTopic page $pageNumber"
    $pageData = (Invoke-RestMethod -UserAgent ([Microsoft.PowerShell.Commands.PSUserAgent]::Chrome) -Method GET -Uri $currentPage)
    }
    catch
    {
    exit
    }

    $allPosts = $pageData.post_stream.posts

    foreach($post in $allPosts)
    {
    Write-Verbose "Checking post: $($post.post_number) by $($post.username) `n"
    if($post.link_counts)
    {
    Write-Verbose "Links found in post number $($post.post_number) `n"
    foreach($link in $post.link_counts)
    {
    Write-Verbose "Link: $link `n"
    if($link.internal -eq $False)
    {
    Write-Output "External link found in post number: $($post.post_number) by $($post.username)"
    Write-Output "$($link.url) `n"
    Write-Verbose "Post body: $($post.cooked) `n"
    }
    }
    }
    }
    }