from bs4 import BeautifulSoup
from urllib.parse import urlparse
import argparse
import datetime
import pprint
import requests
import sys

from scripts.webmention.utils import send_webmention


def get_post_text(post):
    if post.get("story_text"):
        return post["story_text"]
    elif post.get("comment_text"):
        return post["comment_text"]
    else:
        return ""


def main(domain, since_days=7):
    # https://hn.algolia.com/?dateRange=pastWeek&page=0&prefix=false&query=jeremykun.com&sort=byDate&type=story
    search_url = (
        "https://hn.algolia.com/api/v1/search"
        f"?query={domain}&tags=story&hitsPerPage=20"
        f"&numericFilters=created_at_i%3E{int(datetime.datetime.now().timestamp()) - since_days * 24 * 60 * 60}"
    )
    try:
        r = requests.get(search_url)
    except requests.exceptions.RequestException as e:
        print(e)
        sys.exit(1)

    response = r.json()
    pprint.pp(response)
    num_hits = response["nbHits"]
    num_pages = response["nbPages"]
    print(f"Found {num_hits} posts across {num_pages} paginated search pages.")

    for page in range(0, num_pages):
        print(f"Querying page {page}")
        try:
            r = requests.get(f"{search_url}&page={page}")
        except requests.exceptions.RequestException as e:
            print(e)
            sys.exit(1)

        response = r.json()
        hn_posts = response["hits"]
        for post in hn_posts:
            created_at = (
                datetime.datetime.strptime(post["created_at"], "%Y-%m-%dT%H:%M:%SZ")
                if "created_at" in post
                else datetime.datetime.now()
            )
            now = datetime.datetime.now()

            if (now - created_at).days > since_days:
                # we already manually handled this webmention with the initial
                # script run
                print(
                    f"Skipping post because its publication date ({created_at}) "
                    f"is older than the threshold of {since_days} days since "
                    f"today ({now})."
                )
                continue

            post_url = "https://news.ycombinator.com/item?id=" + str(post["objectID"])
            post_http_url = post.get("url")
            print(f"Post URL: {post_http_url}")
            # use 'domain in' because it may be www.jeremykun.com or jeremykun.com
            if post_http_url is not None and domain in urlparse(post_http_url).netloc:
                send_webmention(post_url, post_http_url)
                continue
            else:
                parsed = urlparse(post_http_url).netloc
                print(f"doesn't match {domain} netloc was: {parsed}")

            story_text = get_post_text(post)
            content = BeautifulSoup(story_text, "html.parser")
            links = content.find_all("a")
            for link in links:
                if link.get("href") is None:
                    continue

                post_domain = urlparse(link.get("href")).netloc
                if post_domain == domain:
                    send_webmention(post_url, link.get("href"))


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--domain")
    parser.add_argument("-s", "--since_days", type=int, default=7)
    args = parser.parse_args()
    main(args.domain, args.since_days)