using SixLabors.ImageSharp; using SixLabors.ImageSharp.Processing; using SixLabors.ImageSharp.PixelFormats; using SixLabors.ImageSharp.Formats.Jpeg; using FaceAiSharp; using System.Numerics.Tensors; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; const string InputFolder = @"D:\photo-frame\input"; const string OutputFolder = @"D:\photo-frame\output"; const int OutputWidth = 1366; const int OutputHeight = 768; const float OutputAspectRatio = (float) OutputHeight / OutputWidth; int fileNumber = 0; Console.WriteLine("photo-frame-process"); ImageEmbedder imageEmbedder = new ImageEmbedder(); var faceDetector = FaceAiSharpBundleFactory.CreateFaceDetectorWithLandmarks(); Random rng = new Random(); List metaImages = new List(); DirectoryInfo di = new DirectoryInfo(InputFolder); string[] extensions = { "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp" }; List files = new List(); foreach(string ext in extensions) { files.AddRange(di.GetFiles(ext, SearchOption.AllDirectories)); } foreach(FileInfo file in files) { // debug single image //if (file.Name != "IMG_1023.JPG") { continue; } Console.Write(file.Name); using (Image image = Image.Load(file.FullName)) { image.Mutate(i => i.AutoOrient()); float aspect = (float) image.Height / image.Width; // happiest path - same aspect ratio if (aspect == OutputAspectRatio) { Console.WriteLine(" - same aspect ratio"); image.Mutate(x => x.Resize(OutputWidth, OutputHeight)); image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90}); continue; } // vertical image if (image.Width < image.Height) { Console.WriteLine(" - vertical image"); metaImages.Add(new MetaImage(file.FullName, imageEmbedder.Embed(file.FullName))); continue; } // horizontal image Console.WriteLine(" - horizontal image"); try { FaceAwareResize(image, OutputWidth, OutputHeight); image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90}); } catch (Exception ex) { Console.WriteLine($"Error processing {file.FullName}: {ex.Message}"); } } } while(metaImages.Count > 0) { if (metaImages.Count == 1) { // one left over image, just resize as best as possible... MetaImage metaImage = metaImages[0]; metaImages.Remove(metaImage); Console.WriteLine($"{Path.GetFileName(metaImage.ImagePath)} - single vertical image"); using (Image image = Image.Load(metaImage.ImagePath)) { try { image.Mutate(i => i.AutoOrient()); FaceAwareResize(image, OutputWidth, OutputHeight); image.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90}); } catch (Exception ex) { Console.WriteLine($"Error processing {metaImage.ImagePath}: {ex.Message}"); } } break; } // find two vertical images to combin MetaImage meta1 = metaImages[rng.Next(metaImages.Count)]; MetaImage? meta2 = null; float bestSimilarity = float.MinValue; // find the second image that is closest to the first image based on cosine similarity foreach(MetaImage candidate in metaImages) { if (candidate == meta1) { continue; } float similarity = TensorPrimitives.CosineSimilarity(meta1.Embedding, candidate.Embedding); if (similarity > bestSimilarity) { bestSimilarity = similarity; meta2 = candidate; } } if (meta2 == null) { throw new Exception("No second image found"); } metaImages.Remove(meta1); metaImages.Remove(meta2); Console.WriteLine($"{Path.GetFileName(meta1.ImagePath)} - vertial image paired with {Path.GetFileName(meta2.ImagePath)}"); try { using (Image image1 = Image.Load(meta1.ImagePath)) { image1.Mutate(i => i.AutoOrient()); using (Image image2 = Image.Load(meta2.ImagePath)) { image2.Mutate(i => i.AutoOrient()); FaceAwareResize(image1, OutputWidth / 2, OutputHeight); FaceAwareResize(image2, OutputWidth / 2, OutputHeight); // create a new image with the two images combined using (Image combinedImage = new Image(OutputWidth, OutputHeight)) { combinedImage.Mutate(x => x.DrawImage(image1, new Point(0, 0), 1f)); combinedImage.Mutate(x => x.DrawImage(image2, new Point(OutputWidth / 2, 0), 1f)); combinedImage.SaveAsJpeg(GetNextFileName(), new JpegEncoder {Quality = 90}); } } } } catch (Exception ex) { Console.WriteLine($"Error processing {meta1.ImagePath} and {meta2.ImagePath}: {ex.Message}"); } } Console.WriteLine("photo-frame-process done!"); // utility below void FaceAwareResize(Image image, int width, int height) { RectangleF? detectRect = null; var faces = faceDetector.DetectFaces(image); foreach(var face in faces) { if (detectRect.HasValue) { detectRect = RectangleF.Union(detectRect.Value, face.Box); } else { detectRect = face.Box; } } RectangleF coreRect = detectRect ?? new RectangleF(image.Width / 2.0f, image.Height / 2.0f, 0.1f, 0.1f); // get the center of coreRect as PointF PointF center = new PointF(coreRect.X + coreRect.Width / 2.0f, coreRect.Y + coreRect.Height / 2.0f); float targetAspectRatio = (float) width / height; float imageAspectRatio = (float) image.Width / image.Height; if (targetAspectRatio >= imageAspectRatio) { // figure out the best Y position float targetHeight = image.Width * ((float)height / width); float y = center.Y - targetHeight / 2.0f; if (y < 0) { y = 0; } else if (y + targetHeight > image.Height) { y = image.Height - targetHeight; } int intY = (int)y; int intTargetHeight = (int)targetHeight; int extra = image.Height - (intY + intTargetHeight); if (extra < 0) { intTargetHeight += extra; } Rectangle targetRect = new Rectangle(0, intY, image.Width, intTargetHeight); // crop to targetRect image.Mutate(x => x.Crop(targetRect)); } else { // figure out the best X position float targetWidth = image.Height * ((float)width / height); float x = center.X - targetWidth / 2.0f; if (x < 0) { x = 0; } else if (x + targetWidth > image.Width) { x = image.Width - targetWidth; } int intX = (int)x; int intTargetWidth = (int)targetWidth; int extra = image.Width - (intX + intTargetWidth); if (extra < 0) { intTargetWidth += extra; } Rectangle targetRect = new Rectangle(intX, 0, intTargetWidth, image.Height); // crop to targetRect image.Mutate(x => x.Crop(targetRect)); } // Resize the image to the target dimensions while keeping the face in the center image.Mutate(x => x.Resize(new ResizeOptions { Size = new Size(width, height) })); } string GetNextFileName() { fileNumber++; return Path.Combine(OutputFolder, $"{fileNumber:00000000}.jpg"); } internal class MetaImage { public string ImagePath { get; set; } public float[] Embedding { get; set; } public MetaImage(string imagePath, float[] embedding) { ImagePath = imagePath; Embedding = embedding; } } ///

/// Embeds an image using the OpenAI CLIP model. /// See https://github.com/bartbroere/clip.dll/blob/master/Program.cs ///

internal class ImageEmbedder { private InferenceSession _model; public ImageEmbedder() { if (!File.Exists("clip-image-vit-32-float32.onnx")) { using (HttpClient httpClient = new HttpClient()) { var response = httpClient.GetAsync("https://huggingface.co/rocca/openai-clip-js/resolve/main/clip-image-vit-32-float32.onnx").Result; using (var fs = new FileStream("clip-image-vit-32-float32.onnx", FileMode.CreateNew)) { response.Content.CopyToAsync(fs).Wait(); } } } _model = new InferenceSession("clip-image-vit-32-float32.onnx"); } public float[] Embed(string imagePath) { return Embed(Image.Load(File.ReadAllBytes(imagePath))); } public float[] Embed(Image image) { var smallestSide = Math.Min(image.Width, image.Height); image.Mutate(x => x.Crop( new Rectangle( (image.Width - smallestSide) / 2, (image.Height - smallestSide) / 2, smallestSide, smallestSide ))); image.Mutate(x => x.Resize(224, 224)); var inputTensor = new DenseTensor(new[] {1, 3, 224, 224}); for (var x = 0; x < 224; x++) { for (var y = 0; y < 224; y++) { // Normalize from bytes (0-255) to floats (constants borrowed from CLIP repository) inputTensor[0, 0, y, x] = Convert.ToSingle((((float) image[x, y].R / 255) - 0.48145466) / 0.26862954); inputTensor[0, 1, y, x] = Convert.ToSingle((((float) image[x, y].G / 255) - 0.4578275 ) / 0.26130258); inputTensor[0, 2, y, x] = Convert.ToSingle((((float) image[x, y].B / 255) - 0.40821073) / 0.27577711); } } var inputs = new List {NamedOnnxValue.CreateFromTensor("input", inputTensor)}; var outputData = _model.Run(inputs).ToList().Last().AsTensor().ToArray(); return outputData; } }