Skip to content

Instantly share code, notes, and snippets.

@abfo
Created March 11, 2025 23:49
Show Gist options
  • Select an option

  • Save abfo/a5982d3cf7276543ed9c0d0105e9db7f to your computer and use it in GitHub Desktop.

Select an option

Save abfo/a5982d3cf7276543ed9c0d0105e9db7f to your computer and use it in GitHub Desktop.

Revisions

  1. abfo created this gist Mar 11, 2025.
    338 changes: 338 additions & 0 deletions Program.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,338 @@
    using SixLabors.ImageSharp;
    using SixLabors.ImageSharp.Processing;
    using SixLabors.ImageSharp.PixelFormats;
    using SixLabors.ImageSharp.Formats.Jpeg;
    using FaceAiSharp;
    using System.Numerics.Tensors;
    using Microsoft.ML.OnnxRuntime;
    using Microsoft.ML.OnnxRuntime.Tensors;

    const string InputFolder = @"D:\photo-frame\input";
    const string OutputFolder = @"D:\photo-frame\output";
    const int OutputWidth = 1366;
    const int OutputHeight = 768;
    const float OutputAspectRatio = (float) OutputHeight / OutputWidth;

    int fileNumber = 0;

    Console.WriteLine("photo-frame-process");

    ImageEmbedder imageEmbedder = new ImageEmbedder();
    var faceDetector = FaceAiSharpBundleFactory.CreateFaceDetectorWithLandmarks();
    Random rng = new Random();

    List<MetaImage> metaImages = new List<MetaImage>();

    DirectoryInfo di = new DirectoryInfo(InputFolder);
    string[] extensions = { "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp" };
    List<FileInfo> files = new List<FileInfo>();
    foreach(string ext in extensions)
    {
    files.AddRange(di.GetFiles(ext, SearchOption.AllDirectories));
    }
    foreach(FileInfo file in files)
    {
    // debug single image
    //if (file.Name != "IMG_1023.JPG") { continue; }

    Console.Write(file.Name);

    using (Image<Rgb24> image = Image.Load<Rgb24>(file.FullName))
    {
    image.Mutate(i => i.AutoOrient());

    float aspect = (float) image.Height / image.Width;

    // happiest path - same aspect ratio
    if (aspect == OutputAspectRatio)
    {
    Console.WriteLine(" - same aspect ratio");
    image.Mutate(x => x.Resize(OutputWidth, OutputHeight));
    image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
    continue;
    }

    // vertical image
    if (image.Width < image.Height)
    {
    Console.WriteLine(" - vertical image");
    metaImages.Add(new MetaImage(file.FullName, imageEmbedder.Embed(file.FullName)));
    continue;
    }

    // horizontal image
    Console.WriteLine(" - horizontal image");
    try
    {
    FaceAwareResize(image, OutputWidth, OutputHeight);
    image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
    }
    catch (Exception ex)
    {
    Console.WriteLine($"Error processing {file.FullName}: {ex.Message}");
    }
    }
    }

    while(metaImages.Count > 0)
    {
    if (metaImages.Count == 1)
    {
    // one left over image, just resize as best as possible...
    MetaImage metaImage = metaImages[0];
    metaImages.Remove(metaImage);

    Console.WriteLine($"{Path.GetFileName(metaImage.ImagePath)} - single vertical image");

    using (Image<Rgb24> image = Image.Load<Rgb24>(metaImage.ImagePath))
    {
    try
    {
    image.Mutate(i => i.AutoOrient());
    FaceAwareResize(image, OutputWidth, OutputHeight);
    image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
    }
    catch (Exception ex)
    {
    Console.WriteLine($"Error processing {metaImage.ImagePath}: {ex.Message}");
    }
    }

    break;
    }

    // find two vertical images to combin
    MetaImage meta1 = metaImages[rng.Next(metaImages.Count)];
    MetaImage? meta2 = null;

    float bestSimilarity = float.MinValue;

    // find the second image that is closest to the first image based on cosine similarity
    foreach(MetaImage candidate in metaImages)
    {
    if (candidate == meta1)
    {
    continue;
    }

    float similarity = TensorPrimitives.CosineSimilarity(meta1.Embedding, candidate.Embedding);
    if (similarity > bestSimilarity)
    {
    bestSimilarity = similarity;
    meta2 = candidate;
    }
    }

    if (meta2 == null)
    {
    throw new Exception("No second image found");
    }

    metaImages.Remove(meta1);
    metaImages.Remove(meta2);

    Console.WriteLine($"{Path.GetFileName(meta1.ImagePath)} - vertial image paired with {Path.GetFileName(meta2.ImagePath)}");

    try
    {
    using (Image<Rgb24> image1 = Image.Load<Rgb24>(meta1.ImagePath))
    {
    image1.Mutate(i => i.AutoOrient());

    using (Image<Rgb24> image2 = Image.Load<Rgb24>(meta2.ImagePath))
    {
    image2.Mutate(i => i.AutoOrient());

    FaceAwareResize(image1, OutputWidth / 2, OutputHeight);
    FaceAwareResize(image2, OutputWidth / 2, OutputHeight);

    // create a new image with the two images combined
    using (Image<Rgb24> combinedImage = new Image<Rgb24>(OutputWidth, OutputHeight))
    {
    combinedImage.Mutate(x => x.DrawImage(image1, new Point(0, 0), 1f));
    combinedImage.Mutate(x => x.DrawImage(image2, new Point(OutputWidth / 2, 0), 1f));
    combinedImage.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90});
    }
    }
    }
    }
    catch (Exception ex)
    {
    Console.WriteLine($"Error processing {meta1.ImagePath} and {meta2.ImagePath}: {ex.Message}");
    }
    }

    Console.WriteLine("photo-frame-process done!");


    // utility below


    void FaceAwareResize(Image<Rgb24> image, int width, int height)
    {
    RectangleF? detectRect = null;

    var faces = faceDetector.DetectFaces(image);
    foreach(var face in faces)
    {
    if (detectRect.HasValue)
    {
    detectRect = RectangleF.Union(detectRect.Value, face.Box);
    }
    else
    {
    detectRect = face.Box;
    }
    }

    RectangleF coreRect = detectRect ?? new RectangleF(image.Width / 2.0f, image.Height / 2.0f, 0.1f, 0.1f);

    // get the center of coreRect as PointF
    PointF center = new PointF(coreRect.X + coreRect.Width / 2.0f, coreRect.Y + coreRect.Height / 2.0f);

    float targetAspectRatio = (float) width / height;
    float imageAspectRatio = (float) image.Width / image.Height;

    if (targetAspectRatio >= imageAspectRatio)
    {
    // figure out the best Y position
    float targetHeight = image.Width * ((float)height / width);
    float y = center.Y - targetHeight / 2.0f;
    if (y < 0)
    {
    y = 0;
    }
    else if (y + targetHeight > image.Height)
    {
    y = image.Height - targetHeight;
    }

    int intY = (int)y;
    int intTargetHeight = (int)targetHeight;
    int extra = image.Height - (intY + intTargetHeight);
    if (extra < 0)
    {
    intTargetHeight += extra;
    }

    Rectangle targetRect = new Rectangle(0, intY, image.Width, intTargetHeight);

    // crop to targetRect
    image.Mutate(x => x.Crop(targetRect));
    }
    else
    {
    // figure out the best X position
    float targetWidth = image.Height * ((float)width / height);
    float x = center.X - targetWidth / 2.0f;
    if (x < 0)
    {
    x = 0;
    }
    else if (x + targetWidth > image.Width)
    {
    x = image.Width - targetWidth;
    }

    int intX = (int)x;
    int intTargetWidth = (int)targetWidth;
    int extra = image.Width - (intX + intTargetWidth);
    if (extra < 0)
    {
    intTargetWidth += extra;
    }

    Rectangle targetRect = new Rectangle(intX, 0, intTargetWidth, image.Height);

    // crop to targetRect
    image.Mutate(x => x.Crop(targetRect));
    }

    // Resize the image to the target dimensions while keeping the face in the center
    image.Mutate(x => x.Resize(new ResizeOptions
    {
    Size = new Size(width, height)
    }));
    }

    string GetNextFileName()
    {
    fileNumber++;
    return Path.Combine(OutputFolder, $"{fileNumber:00000000}.jpg");
    }

    internal class MetaImage
    {
    public string ImagePath { get; set; }
    public float[] Embedding { get; set; }

    public MetaImage(string imagePath, float[] embedding)
    {
    ImagePath = imagePath;
    Embedding = embedding;
    }
    }

    /// <summary>
    /// Embeds an image using the OpenAI CLIP model.
    /// See https://github.com/bartbroere/clip.dll/blob/master/Program.cs
    /// </summary>
    internal class ImageEmbedder
    {
    private InferenceSession _model;

    public ImageEmbedder()
    {
    if (!File.Exists("clip-image-vit-32-float32.onnx"))
    {
    using (HttpClient httpClient = new HttpClient())
    {
    var response = httpClient.GetAsync("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx").Result;
    using (var fs = new FileStream("clip-image-vit-32-float32.onnx", FileMode.CreateNew))
    {
    response.Content.CopyToAsync(fs).Wait();
    }
    }
    }

    _model = new InferenceSession("clip-image-vit-32-float32.onnx");
    }

    public float[] Embed(string imagePath)
    {
    return Embed(Image.Load<Rgb24>(File.ReadAllBytes(imagePath)));
    }

    public float[] Embed(Image<Rgb24> image)
    {
    var smallestSide = Math.Min(image.Width, image.Height);
    image.Mutate(x => x.Crop(
    new Rectangle(
    (image.Width - smallestSide) / 2,
    (image.Height - smallestSide) / 2,
    smallestSide,
    smallestSide
    )));

    image.Mutate(x => x.Resize(224, 224));

    var inputTensor = new DenseTensor<float>(new[] {1, 3, 224, 224});

    for (var x = 0; x < 224; x++)
    {
    for (var y = 0; y < 224; y++)
    {
    // Normalize from bytes (0-255) to floats (constants borrowed from CLIP repository)
    inputTensor[0, 0, y, x] = Convert.ToSingle((((float) image[x, y].R / 255) - 0.48145466) / 0.26862954);
    inputTensor[0, 1, y, x] = Convert.ToSingle((((float) image[x, y].G / 255) - 0.4578275 ) / 0.26130258);
    inputTensor[0, 2, y, x] = Convert.ToSingle((((float) image[x, y].B / 255) - 0.40821073) / 0.27577711);
    }
    }

    var inputs = new List<NamedOnnxValue> {NamedOnnxValue.CreateFromTensor("input", inputTensor)};

    var outputData = _model.Run(inputs).ToList().Last().AsTensor<float>().ToArray();

    return outputData;
    }
    }