import os
import re
from datetime import datetime


def extract_and_format_changelogs(directory="."):
    """
    Scans HTML files in a directory, extracts changelogs, and formats them.

    Args:
        directory (str): The directory to scan for HTML files.
    """
    all_changelogs = {}

    for filename in os.listdir(directory):
        if filename.endswith(".htm") or filename.endswith(".html"):
            filepath = os.path.join(directory, filename)
            try:
                with open(filepath, "r", encoding="utf-8") as f:
                    content = f.read()
                    # Use re.DOTALL to allow . to match newlines
                    changelog_match = re.search(
                        r'<p style="color: rgb\(42, 165, 165\);">\s*Changelog:\s*<ul>(.*?)</ul>\s*</p>',
                        content,
                        re.DOTALL,
                    )
                    if changelog_match:
                        changelog_items = re.findall(
                            r"<li>(.*?)</li>", changelog_match.group(1)
                        )
                        for item in changelog_items:
                            # Extract date and description
                            date_match = re.match(
                                r"(\d{1,2}/\d{1,2}/\d{2,4}):\s*(.*)", item
                            )
                            if date_match:
                                date_str = date_match.group(1).strip()
                                description = date_match.group(2).strip()
                                try:
                                    # Convert date to a sortable format (YYYY-MM-DD)
                                    # Handle both YY and YYYY formats
                                    if len(date_str.split("/")[2]) == 2:
                                        dt_object = datetime.strptime(
                                            date_str, "%m/%d/%y"
                                        )
                                    else:
                                        dt_object = datetime.strptime(
                                            date_str, "%m/%d/%Y"
                                        )
                                    formatted_date = dt_object.strftime("%Y-%m-%d")

                                    if formatted_date not in all_changelogs:
                                        all_changelogs[formatted_date] = []
                                    all_changelogs[formatted_date].append(
                                        f"[{filename}]: {description}"
                                    )
                                except ValueError:
                                    print(
                                        f"Warning: Could not parse date '{date_str}' in file '{filename}'. Skipping this entry."
                                    )
                            else:
                                print(
                                    f"Warning: Could not parse changelog item '{item}' in file '{filename}'. Skipping this entry."
                                )

            except Exception as e:
                print(f"Error processing file {filepath}: {e}")

    # Sort changelogs by date in descending order
    sorted_dates = sorted(all_changelogs.keys(), reverse=True)

    output = ["# Theo's Changelog\n"]
    for date_str in sorted_dates:
        # Convert YYYY-MM-DD back to M/D/YY or M/D/YYYY for display if desired
        display_date = datetime.strptime(date_str, "%Y-%m-%d").strftime(
            "%#m/%#d/%y"
        )  # %#m for no leading zero on month/day on Windows, %-m for Linux/macOS
        if os.name == "nt":  # Check if OS is Windows
            display_date = datetime.strptime(date_str, "%Y-%m-%d").strftime(
                "%#m/%#d/%Y"
            )
        else:
            display_date = datetime.strptime(date_str, "%Y-%m-%d").strftime(
                "%-m/%-d/%Y"
            )

        output.append(f"## {display_date}\n")
        for item in all_changelogs[date_str]:
            output.append(f"- {item}\n")
        output.append("\n")

    return "".join(output)


if __name__ == "__main__":
    changelog_output = extract_and_format_changelogs()

    # You can also save this to a file
    with open("CHANGELOG.md", "w", encoding="utf-8") as f:
        f.write(changelog_output)
    print("\nChangelog saved to CHANGELOG.md")
