"""
kindle_highlights.py

This script provides functionality to parse and process Kindle highlights and notes.
It extracts data from a Kindle clippings file and organizes it into a structured format.

Usage:
    from kindle_highlights import KindleClippingCollection
    collection = KindleClippingCollection.extract_from_file("path/to/My Clippings.txt")
    collection.save_as_files("output_directory")
"""

from pydantic import BaseModel, field_validator, ValidationError
from typing import Optional, Literal, Dict, List
from datetime import datetime
import re
import logging
from os import path

class KindleClipping(BaseModel):
    title: str
    author: str
    type: Literal["Highlight", "Note", "Bookmark"]
    page: Optional[int | str]
    location_start: int
    location_end: Optional[int]
    date: datetime
    content: str

    @field_validator("date", mode="before")
    def parse_date(cls, value):
        return datetime.strptime(value, "%A, %d %B %Y %H:%M:%S")

    def to_location_range(self) -> str:
        if self.location_end:
            return f"{self.location_start}-{self.location_end}"
        else:
            return str(self.location_start)

    def to_position(self) -> str:
        if self.page:
            return f"page {self.page}, loc. {self.to_location_range()}"
        else:
            return f"loc. {self.to_location_range()}"

    def to_markdown(self) -> str:
        if self.type == "Highlight":
            return f"* {self.content}\n  - {self.to_position()}"
        elif self.type == "Note":
            return f"  - **COMMENT**: {self.content}"
        elif self.type == "Bookmark":
            return f"* **BOOKMARK**: {self.content} {self.to_position()}"
        else:
            raise ValueError(f"Unknown type: {self.type}")

class KindleClippingCollection:
    def __init__(self, books: Dict[str, List[KindleClipping]]):
        self.books = books

    @classmethod
    def group_by_book_title(
        cls, clippings: List[KindleClipping]
    ) -> Dict[str, List[KindleClipping]]:
        books = {}
        for clipping in clippings:
            if clipping.title not in books:
                books[clipping.title] = []
            books[clipping.title].append(clipping)
        return cls(books)

    @classmethod
    def extract_from_file(
        cls, file_path: str, clear_highlights: bool = True
    ) -> "KindleClippingCollection":
        with open(file_path, "r", encoding="utf-8-sig") as file:
            text = file.read()
        parts = text.split("\n==========\n")
        pattern = re.compile(
            r"^(?P<title>.*) \((?P<author>.*)\)\n- Your (?P<type>Highlight|Note|Bookmark)(?: on page (?P<page>[\w\d]+) \|)?(?: at)? location (?P<location_start>\d+)(?:-(?P<location_end>\d+))? \| Added on (?P<date>.*)\n+(?P<content>(?:.|\n)*)",
            re.MULTILINE,
        )
        clippings: List[KindleClipping] = []

        for part in parts:
            match = pattern.match(part)
            if match:
                try:
                    clippings.append(KindleClipping(**match.groupdict()))
                except ValidationError as e:
                    logging.error(f"Error parsing clipping ({e}):\n{part}")
            else:
                logging.error(f"Error parsing clipping (no match):\n{part}")

        res = cls.group_by_book_title(clippings)
        if clear_highlights:
            res = res.clear_highlights_all()
        return res

    @staticmethod
    def clear_highlights(clippings: List[KindleClipping]) -> List[KindleClipping]:
        last = clippings[0]
        filtered = [last]
        for clipping in clippings[1:]:
            if (
                clipping.type == "Highlight"
                and clipping.location_start == last.location_start
            ):
                filtered[-1] = clipping
                logging.info(
                    f"Highlight removed as it seems to be updated.\nOLD:{last.content}\nNEW:{clipping.content}"
                )
            else:
                filtered.append(clipping)
            last = clipping
        return filtered

    def clear_highlights_all(self) -> "KindleClippingCollection":
        return KindleClippingCollection(
            {
                title: self.clear_highlights(clippings)
                for title, clippings in self.books.items()
            }
        )

    @staticmethod
    def book_to_markdown(clippings: List[KindleClipping], as_file: bool = True) -> str:
        quotations = "\n".join([clipping.to_markdown() for clipping in clippings])
        title = clippings[0].title
        author = clippings[0].author
        dates = [clipping.date for clipping in clippings]
        date_first = min(dates).strftime("%Y-%m-%d")
        date_last = max(dates).strftime("%Y-%m-%d")
        if as_file:
            return f"""---
title: "{title}"
author: {author}
date_first: {date_first}
date_last: {date_last}
---
## Quotations

{quotations}"""
        else:
            return f"""## {title} by {author}

Highlights from {min(dates).strftime('%Y-%m-%d')} to {max(dates).strftime('%Y-%m-%d')}

{quotations}"""

    def save_as_files(self, output_dir: str, min_highlights_for_separate_file: int = 5):
        others = []
        for title, clippings in self.books.items():
            if len(clippings) >= min_highlights_for_separate_file:
                filename = re.sub(r'[<>:"/\\|?*]', "", title)
                with open(path.join(output_dir, f"{filename}.md"), "w", encoding="utf-8") as file:
                    file.write(self.book_to_markdown(clippings, as_file=True))
            else:
                others.append(self.book_to_markdown(clippings, as_file=False))
        with open(path.join(output_dir, "other.md"), "w", encoding="utf-8") as file:
            file.write("\n\n".join(others))