BOUNDING_BOX_TOOL_NAME = "capture_bounding_boxes" BOUNDING_BOX_TOOL = { "type": "function", "name": BOUNDING_BOX_TOOL_NAME, "strict": True, "description": ( "Capture normalized bounding boxes for salient regions in the provided image. " "Each box should map to a meaningful real-world entity or text block and use clockwise point order." ), "parameters": { "type": "object", "properties": { "boxes": { "type": "array", "description": "List of relevant detections within the image.", "items": { "type": "object", "properties": { "label": { "type": "string", "description": "Concise human-readable label for the detected item.", }, "notes": { "type": "string", "description": "Optional sentence with clarifying details for the detection.", }, "confidence": { "type": "number", "minimum": 0, "maximum": 1, "description": "Confidence value in the range [0,1].", }, "bbx_2d": { "type": "array", "description": "Polygon with four normalized points [[x,y], ...] ordered clockwise.", "minItems": 4, "maxItems": 4, "items": { "type": "array", "items": {"type": "number"}, "minItems": 2, "maxItems": 2, }, }, }, "required": ["label", "bbx_2d"], "additionalProperties": False, }, } }, "required": ["boxes"], "additionalProperties": False, }, }