import argparse import pandas as pd from typing import Optional from pydantic import BaseModel, field_validator, ValidationError from datetime import datetime # Define the Pydantic model for sleep data class SleepData(BaseModel): cycle_start_time: datetime | None cycle_end_time: datetime | None cycle_timezone: str sleep_onset: datetime wake_onset: datetime sleep_performance: int respiratory_rate: float asleep_duration: int in_bed_duration: int light_sleep_duration: int deep_sws_duration: int rem_duration: int awake_duration: int sleep_need: int sleep_debt: int | None sleep_efficiency: int sleep_consistency: int | None nap: bool # Validator for boolean fields represented as 'true/false' in the CSV @field_validator("nap", mode="before") def parse_boolean(cls, value): if isinstance(value, bool): return value return value.lower() == "true" # Validator to handle optional fields with NaN values @field_validator("cycle_end_time", "sleep_consistency", mode="before") def handle_optional_nan(cls, value): if pd.isna(value): return None return value def to_dict(self): """Convert the model instance to a dictionary.""" return self.dict() def read_and_parse_csv(file_path: str): """Read the sleeps.csv file and parse it using the SleepData model.""" try: # Read the CSV file into a Pandas DataFrame df = pd.read_csv(file_path) # Rename columns to match the Pydantic model's attributes df.columns = [ "cycle_start_time", "cycle_end_time", "cycle_timezone", "sleep_onset", "wake_onset", "sleep_performance", "respiratory_rate", "asleep_duration", "in_bed_duration", "light_sleep_duration", "deep_sws_duration", "rem_duration", "awake_duration", "sleep_need", "sleep_debt", "sleep_efficiency", "sleep_consistency", "nap", ] # Parse each row using the Pydantic model sleep_data = [] for row in df.to_dict(orient="records"): try: sleep_data.append(SleepData(**row)) except ValidationError as e: print(f"Validation error for row {row}: {e}") return sleep_data except Exception as e: print(f"Error parsing the CSV file: {e}") return [] def generate_html_report(sleep_data, output_file): """Generate an HTML report with two graphs.""" dates = [data.sleep_onset.strftime("%Y-%m-%d") for data in sleep_data] rem_durations = [data.rem_duration for data in sleep_data] # Reverse the order of dates and REM durations for chronological display dates.reverse() rem_durations.reverse() # Calculate monthly averages df = pd.DataFrame({"date": dates, "rem_duration": rem_durations}) df["date"] = pd.to_datetime(df["date"]) df["month"] = df["date"].dt.to_period("M") monthly_avg = df.groupby("month")["rem_duration"].mean() # Convert to JavaScript-friendly formats monthly_avg_labels = [str(month) for month in monthly_avg.index] monthly_avg_values = [ float(value) for value in monthly_avg.values ] # Convert np.float64 to float html_template = f""" Sleep Data Report

Sleep Data Report

Daily REM Durations

Monthly Average REM Durations

""" with open(output_file, "w") as file: file.write(html_template) def main(): parser = argparse.ArgumentParser( description="Parse and validate a sleeps.csv file." ) parser.add_argument("file", type=str, help="Path to the sleeps.csv file") parser.add_argument("output", type=str, help="Path to the output HTML file") args = parser.parse_args() sleep_data = read_and_parse_csv(args.file) if sleep_data: print("Generating HTML report...") generate_html_report(sleep_data, args.output) print(f"Report generated: {args.output}") else: print("No valid data found in the file.") if __name__ == "__main__": main()