#!/usr/bin/env python3
import boto3
import datetime
from pathlib import Path
from typing import Dict, List, Tuple
import humanize
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class S3Reporter:
def __init__(self):
# Cliente inicial para operaciones básicas
self.default_client = boto3.client('s3')
self.account_id = boto3.client('sts').get_caller_identity().get('Account')
self.region_clients = {}
def get_s3_client(self, region: str = None) -> boto3.client:
"""Obtiene un cliente S3 para una región específica"""
if not region:
return self.default_client
if region not in self.region_clients:
self.region_clients[region] = boto3.client('s3', region_name=region)
return self.region_clients[region]
def get_bucket_size_and_cost(self, bucket_name: str, region: str) -> Tuple[int, float]:
"""Calcula el tamaño total y coste estimado mensual de un bucket"""
total_size = 0
s3_client = self.get_s3_client(region)
try:
paginator = s3_client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name):
if 'Contents' in page:
for obj in page['Contents']:
total_size += obj.get('Size', 0)
# Cálculo aproximado del coste mensual (simplificación)
cost_per_gb_month = 0.023 # USD por GB/mes para primeros 50TB
cost = (total_size / (1024**3)) * cost_per_gb_month
return total_size, cost
except Exception as e:
logger.error(f"Error al obtener tamaño del bucket {bucket_name}: {str(e)}")
return 0, 0
def get_bucket_region(self, bucket_name: str) -> str:
"""Obtiene la región de un bucket"""
try:
location = self.default_client.get_bucket_location(Bucket=bucket_name)
region = location['LocationConstraint']
# None significa us-east-1
return region if region else 'us-east-1'
except Exception as e:
logger.error(f"Error al obtener región del bucket {bucket_name}: {str(e)}")
return "desconocida"
def get_bucket_contents(self, bucket_name: str, region: str) -> List[Dict]:
"""Obtiene el contenido de un bucket"""
contents = []
s3_client = self.get_s3_client(region)
try:
paginator = s3_client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name):
if 'Contents' in page:
for obj in page['Contents']:
try:
obj_info = s3_client.head_object(
Bucket=bucket_name,
Key=obj['Key']
)
storage_class = obj_info.get('StorageClass', 'STANDARD')
except:
storage_class = obj.get('StorageClass', 'UNKNOWN')
contents.append({
'path': obj['Key'],
'size': obj['Size'],
'storage_class': storage_class
})
except Exception as e:
logger.error(f"Error al listar contenido del bucket {bucket_name}: {str(e)}")
return contents
def generate_html_report(self):
"""Genera el informe HTML"""
buckets_info = []
buckets_contents = {}
# Listar todos los buckets
try:
response = self.default_client.list_buckets()
buckets = response['Buckets']
except Exception as e:
logger.error(f"Error al listar buckets: {str(e)}")
return
# Recopilar información de todos los buckets
total_buckets = len(buckets)
for i, bucket in enumerate(buckets, 1):
bucket_name = bucket['Name']
logger.info(f"Procesando bucket {i}/{total_buckets}: {bucket_name}")
# Obtener región del bucket
region = self.get_bucket_region(bucket_name)
logger.info(f"Región del bucket {bucket_name}: {region}")
# Obtener tamaño y coste
size, cost = self.get_bucket_size_and_cost(bucket_name, region)
buckets_info.append({
'name': bucket_name,
'region': region,
'size': size,
'cost': cost
})
# Obtener contenido del bucket
buckets_contents[bucket_name] = self.get_bucket_contents(bucket_name, region)
# Generar HTML
html = self._generate_html_content(buckets_info, buckets_contents)
# Guardar el informe
filename = f"s3-report-{self.account_id}.html"
with open(filename, 'w', encoding='utf-8') as f:
f.write(html)
logger.info(f"Informe generado: {filename}")
def _generate_html_content(self, buckets_info: List[Dict], buckets_contents: Dict) -> str:
"""Genera el contenido HTML del informe"""
html = f"""
Informe S3 - Cuenta {self.account_id}
Informe S3 - Cuenta {self.account_id}
Listado de Buckets
| Nombre del Bucket |
Región |
Tamaño Total |
Coste Mensual Estimado |
"""
# Agregar información de buckets
for bucket in buckets_info:
html += f"""
| {bucket['name']} |
{bucket['region']} |
{humanize.naturalsize(bucket['size'])} |
${bucket['cost']:.2f} USD/mes |
"""
html += """
Contenido de los Buckets
"""
# Agregar contenido de cada bucket
for bucket_name, contents in buckets_contents.items():
html += f"""
Bucket: {bucket_name}
| Ruta |
Tamaño |
Tipo de Almacenamiento |
"""
for item in contents:
storage_class = item['storage_class']
html += f"""
| {item['path']} |
{humanize.naturalsize(item['size'])} |
{storage_class} |
"""
html += """
"""
html += f"""
"""
return html
def main():
try:
reporter = S3Reporter()
reporter.generate_html_report()
except Exception as e:
logger.error(f"Error: {str(e)}")
if __name__ == "__main__":
main()