# /// script # requires-python = ">=3.12" # dependencies = [ # "torch", # "transformers", # "Pillow", # ] # /// from pathlib import Path import torch from transformers import pipeline # List of models to test MODELS = [ "ydshieh/vit-gpt2-coco-en", "Salesforce/blip-image-captioning-large", "microsoft/git-base-coco", ] def process_image(image_path: Path, models): """Process a single image through multiple models.""" print(f"\n{image_path.name}") for model_name in models: try: # Initialize model captioner = pipeline(model=model_name, device=0 if torch.cuda.is_available() else -1) # Generate caption caption = captioner(str(image_path)) # Get caption text caption_text = caption[0]['generated_text'] if isinstance(caption, list) else caption print(f"{model_name}: {caption_text}") except Exception as e: print(f"{model_name}: Error: {str(e)}") def main(): """Process all images in current directory.""" # Get all image files in current directory image_files = list(Path().glob('*.jpg')) + list(Path().glob('*.png')) if not image_files: print("No image files (jpg/png) found in current directory!") return # Process each image for img_path in image_files: process_image(img_path, MODELS) if __name__ == '__main__': main()