#!/usr/bin/env python3

"""
extract-police-blotter.py

Parses and extracts structured data from the screenshot at the given URL:

https://gist.github.com/user-attachments/assets/ceb6db99-e884-4566-bea8-c48b415a5703

This script assumes your API key is set up in the default way,
  i.e. environment variable: $OPENAI_API_KEY
  https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety

"""
import base64
import json
from openai import OpenAI
from pathlib import Path
from pydantic import BaseModel, Field

INPUT_URL = "https://gist.github.com/user-attachments/assets/ceb6db99-e884-4566-bea8-c48b415a5703"


# OpenAI examples of Stuctured Output scripts and data definitions
# https://platform.openai.com/docs/guides/structured-outputs/examples?context=ex2


# Define the data structures in Pydantic:
# an Incident involves several Persons (victims, perpetrators)
class Person(BaseModel):
    description: str
    gender: str
    is_student: bool


# Pydantic docs on field descriptions:
# https://docs.pydantic.dev/latest/concepts/fields/
class Incident(BaseModel):
    date: str
    time: str
    location: str
    summary: str = Field(description="""Brief summary, less than 30 chars""")
    category: str = Field(
        description="""Type of report, broadly speaking: "violent" , "property", "traffic", "call for service", or "other" """
    )
    property_damage: str = Field(
        description="""If a property crime, then a description of what was stolen/damaged/lost"""
    )
    arrest_made: bool
    perpetrators: list[Person]
    victims: list[Person]
    incident_text: str = Field(
        description="""Include the complete verbatim text from the input that pertains to the incident"""
    )


class Blotter(BaseModel):
    incidents: list[Incident]


## done defining the data structures
##################################################


## initialize OpenAI client
client = OpenAI()


# Example of message format for passing in an image via URL
# https://cookbook.openai.com/examples/gpt4o/introduction_to_gpt4o#url-image-processing
input_messages = [
    {"role": "system", "content": "Output the result in JSON format."},
    {
        "role": "user",
        "content": [
            {"type": "text", "text": "Extract the text from this image"},
            {
                "type": "image_url",
                "image_url": {"url": INPUT_URL},
            },
        ],
    },
]

# gpt-4o-mini is cheap and fast and has vision capabilities
response = client.beta.chat.completions.parse(
    response_format=Blotter,
    model="gpt-4o-mini",
    messages=input_messages
)

message = response.choices[0].message

# Print it out in readable format
obj = json.loads(message.content)
print(json.dumps(obj, indent=2))