abfo · March 11, 2025 23:49 · Mar 11, 2025
diff --git a/Program.cs b/Program.cs
@@ -0,0 +1,338 @@
+using SixLabors.ImageSharp;
+using SixLabors.ImageSharp.Processing;
+using SixLabors.ImageSharp.PixelFormats;
+using SixLabors.ImageSharp.Formats.Jpeg;
+using FaceAiSharp;
+using System.Numerics.Tensors;
+using Microsoft.ML.OnnxRuntime;
+using Microsoft.ML.OnnxRuntime.Tensors;
+
+const string InputFolder = @"D:\photo-frame\input";
+const string OutputFolder = @"D:\photo-frame\output";
+const int OutputWidth = 1366;
+const int OutputHeight = 768;
+const float OutputAspectRatio = (float) OutputHeight / OutputWidth;
+
+int fileNumber = 0;
+
+Console.WriteLine("photo-frame-process");
+
+ImageEmbedder imageEmbedder = new ImageEmbedder();
+var faceDetector = FaceAiSharpBundleFactory.CreateFaceDetectorWithLandmarks();
+Random rng = new Random();
+
+List<MetaImage> metaImages = new List<MetaImage>();
+
+DirectoryInfo di = new DirectoryInfo(InputFolder);
+string[] extensions = { "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp" };
+List<FileInfo> files = new List<FileInfo>();
+foreach(string ext in extensions)
+{
+    files.AddRange(di.GetFiles(ext, SearchOption.AllDirectories));
+}
+foreach(FileInfo file in files)
+{
+    // debug single image
+    //if (file.Name != "IMG_1023.JPG") { continue; }
+
+    Console.Write(file.Name);
+
+    using (Image<Rgb24> image = Image.Load<Rgb24>(file.FullName))
+    {
+        image.Mutate(i => i.AutoOrient());
+
+        float aspect = (float) image.Height / image.Width;
+
+        // happiest path - same aspect ratio
+        if (aspect == OutputAspectRatio)
+        {
+            Console.WriteLine(" - same aspect ratio");
+            image.Mutate(x => x.Resize(OutputWidth, OutputHeight));
+            image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
+            continue;
+        }
+
+        // vertical image
+        if (image.Width < image.Height)
+        {
+            Console.WriteLine(" - vertical image");
+            metaImages.Add(new MetaImage(file.FullName, imageEmbedder.Embed(file.FullName)));
+            continue;
+        }
+
+        // horizontal image
+        Console.WriteLine(" - horizontal image");
+        try
+        {
+            FaceAwareResize(image, OutputWidth, OutputHeight);
+            image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
+        }
+        catch (Exception ex)
+        {
+            Console.WriteLine($"Error processing {file.FullName}: {ex.Message}");
+        }
+    }
+}
+
+while(metaImages.Count > 0)
+{
+    if (metaImages.Count == 1)
+    {
+        // one left over image, just resize as best as possible...
+        MetaImage metaImage = metaImages[0];
+        metaImages.Remove(metaImage);
+
+        Console.WriteLine($"{Path.GetFileName(metaImage.ImagePath)} - single vertical image");
+
+        using (Image<Rgb24> image = Image.Load<Rgb24>(metaImage.ImagePath)) 
+        {
+            try
+            {
+                image.Mutate(i => i.AutoOrient());
+                FaceAwareResize(image, OutputWidth, OutputHeight);
+                image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
+            }
+            catch (Exception ex)
+            {
+                Console.WriteLine($"Error processing {metaImage.ImagePath}: {ex.Message}");
+            }
+        }
+
+        break;
+    }
+
+    // find two vertical images to combin
+    MetaImage meta1 = metaImages[rng.Next(metaImages.Count)];
+    MetaImage? meta2 = null;
+
+    float bestSimilarity = float.MinValue;
+
+    // find the second image that is closest to the first image based on cosine similarity
+    foreach(MetaImage candidate in metaImages)
+    {
+        if (candidate == meta1)
+        {
+            continue;
+        }
+
+        float similarity = TensorPrimitives.CosineSimilarity(meta1.Embedding, candidate.Embedding);
+        if (similarity > bestSimilarity)
+        {
+            bestSimilarity = similarity;
+            meta2 = candidate;
+        }
+    }
+
+    if (meta2 == null)
+    {
+        throw new Exception("No second image found");
+    }
+
+    metaImages.Remove(meta1);
+    metaImages.Remove(meta2);
+
+    Console.WriteLine($"{Path.GetFileName(meta1.ImagePath)} - vertial image paired with {Path.GetFileName(meta2.ImagePath)}");
+
+    try
+    {
+        using (Image<Rgb24> image1 = Image.Load<Rgb24>(meta1.ImagePath))
+        {
+            image1.Mutate(i => i.AutoOrient());
+
+            using (Image<Rgb24> image2 = Image.Load<Rgb24>(meta2.ImagePath))
+            {
+                image2.Mutate(i => i.AutoOrient());
+
+                FaceAwareResize(image1, OutputWidth / 2, OutputHeight);
+                FaceAwareResize(image2, OutputWidth / 2, OutputHeight);
+
+                // create a new image with the two images combined
+                using (Image<Rgb24> combinedImage = new Image<Rgb24>(OutputWidth, OutputHeight))
+                {
+                    combinedImage.Mutate(x => x.DrawImage(image1, new Point(0, 0), 1f));
+                    combinedImage.Mutate(x => x.DrawImage(image2, new Point(OutputWidth / 2, 0), 1f));    
+                    combinedImage.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
+                }
+            }
+        }
+    }
+    catch (Exception ex)
+    {
+        Console.WriteLine($"Error processing {meta1.ImagePath} and {meta2.ImagePath}: {ex.Message}");
+    }
+}
+
+Console.WriteLine("photo-frame-process done!");
+
+
+// utility below
+
+
+void FaceAwareResize(Image<Rgb24> image, int width, int height)
+{
+    RectangleF? detectRect = null;
+
+    var faces = faceDetector.DetectFaces(image);
+    foreach(var face in faces)
+    {
+        if (detectRect.HasValue)
+        {
+            detectRect = RectangleF.Union(detectRect.Value, face.Box);
+        }
+        else
+        {
+            detectRect = face.Box;
+        }
+    }
+
+    RectangleF coreRect = detectRect ?? new RectangleF(image.Width / 2.0f, image.Height / 2.0f, 0.1f, 0.1f);
+
+    // get the center of coreRect as PointF
+    PointF center = new PointF(coreRect.X + coreRect.Width / 2.0f, coreRect.Y + coreRect.Height / 2.0f);
+
+    float targetAspectRatio = (float) width / height;
+    float imageAspectRatio = (float) image.Width / image.Height;
+
+    if (targetAspectRatio >= imageAspectRatio)
+    {
+        // figure out the best Y position
+        float targetHeight = image.Width * ((float)height / width);
+        float y = center.Y - targetHeight / 2.0f;
+        if (y < 0)
+        {
+            y = 0;
+        }
+        else if (y + targetHeight > image.Height)
+        {
+            y = image.Height - targetHeight;
+        }
+
+        int intY = (int)y;
+        int intTargetHeight = (int)targetHeight;   
+        int extra = image.Height - (intY + intTargetHeight);
+        if (extra < 0)
+        {
+            intTargetHeight += extra;
+        }
+
+        Rectangle targetRect = new Rectangle(0, intY, image.Width, intTargetHeight);
+
+        // crop to targetRect
+        image.Mutate(x => x.Crop(targetRect));
+    }
+    else 
+    {
+        // figure out the best X position
+        float targetWidth = image.Height * ((float)width / height);
+        float x = center.X - targetWidth / 2.0f;
+        if (x < 0)
+        {
+            x = 0;
+        }
+        else if (x + targetWidth > image.Width)
+        {
+            x = image.Width - targetWidth;
+        }
+
+        int intX = (int)x;
+        int intTargetWidth = (int)targetWidth;   
+        int extra = image.Width - (intX + intTargetWidth);
+        if (extra < 0)
+        {
+            intTargetWidth += extra;
+        }
+
+        Rectangle targetRect = new Rectangle(intX, 0, intTargetWidth, image.Height);
+
+        // crop to targetRect
+        image.Mutate(x => x.Crop(targetRect));
+    }
+
+    // Resize the image to the target dimensions while keeping the face in the center
+    image.Mutate(x => x.Resize(new ResizeOptions
+    {
+        Size = new Size(width, height)
+    }));
+}
+
+string GetNextFileName()
+{
+    fileNumber++;
+    return Path.Combine(OutputFolder, $"{fileNumber:00000000}.jpg");
+}
+
+internal class MetaImage
+{
+    public string ImagePath { get; set; }
+    public float[] Embedding { get; set; }
+
+    public MetaImage(string imagePath, float[] embedding)
+    {
+        ImagePath = imagePath;
+        Embedding = embedding;
+    }
+}
+
+/// <summary>
+/// Embeds an image using the OpenAI CLIP model.
+/// See https://github.com/bartbroere/clip.dll/blob/master/Program.cs
+/// </summary>
+internal class ImageEmbedder
+{
+    private InferenceSession _model;
+
+    public ImageEmbedder()
+    {
+        if (!File.Exists("clip-image-vit-32-float32.onnx"))
+        {
+            using (HttpClient httpClient = new HttpClient())
+            {
+                var response = httpClient.GetAsync("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx").Result;
+                using (var fs = new FileStream("clip-image-vit-32-float32.onnx", FileMode.CreateNew))
+                {
+                    response.Content.CopyToAsync(fs).Wait();
+                }
+            }
+        }
+
+        _model = new InferenceSession("clip-image-vit-32-float32.onnx");
+    }
+
+    public float[] Embed(string imagePath)
+    {
+        return Embed(Image.Load<Rgb24>(File.ReadAllBytes(imagePath)));
+    }
+
+    public float[] Embed(Image<Rgb24> image)
+    {
+        var smallestSide = Math.Min(image.Width, image.Height);
+        image.Mutate(x => x.Crop(
+            new Rectangle(
+                (image.Width - smallestSide) / 2,
+            (image.Height - smallestSide) / 2,
+            smallestSide,
+            smallestSide
+        )));
+
+        image.Mutate(x => x.Resize(224, 224));
+
+        var inputTensor = new DenseTensor<float>(new[] {1, 3, 224, 224});
+
+        for (var x = 0; x < 224; x++)
+        {
+            for (var y = 0; y < 224; y++)
+            {
+                // Normalize from bytes (0-255) to floats (constants borrowed from CLIP repository)
+                inputTensor[0, 0, y, x] = Convert.ToSingle((((float) image[x, y].R / 255) - 0.48145466) / 0.26862954);
+                inputTensor[0, 1, y, x] = Convert.ToSingle((((float) image[x, y].G / 255) - 0.4578275 ) / 0.26130258);
+                inputTensor[0, 2, y, x] = Convert.ToSingle((((float) image[x, y].B / 255) - 0.40821073) / 0.27577711);
+            }
+        }
+
+        var inputs = new List<NamedOnnxValue> {NamedOnnxValue.CreateFromTensor("input", inputTensor)};
+
+        var outputData = _model.Run(inputs).ToList().Last().AsTensor<float>().ToArray();
+
+        return outputData;
+    }
+}
No results found