Created
February 6, 2025 18:41
-
-
Save isaaccorley/9424f71996b18e4b00b3ac54824d8e77 to your computer and use it in GitHub Desktop.
Revisions
-
isaaccorley created this gist
Feb 6, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,208 @@ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget https://raw.githubusercontent.com/RaptorMaps/InfraredSolarModules/master/2020-02-14_InfraredSolarModules.zip\n", "!unzip 2020-02-14_InfraredSolarModules.zip" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install numpy pillow tqdm scikit-learn" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 20000/20000 [00:03<00:00, 5425.94it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "(20000, 960) (20000,)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import os\n", "import json\n", "import numpy as np\n", "from PIL import Image\n", "from tqdm import tqdm\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.model_selection import train_test_split\n", "\n", "\n", "root = \"InfraredSolarModules\"\n", "with open(os.path.join(root, \"module_metadata.json\"), \"r\") as f:\n", " data = json.load(f)\n", "\n", "classes = sorted(list(set([v[\"anomaly_class\"] for v in data.values()])))\n", "cls2idx = {cls: i for i, cls in enumerate(classes)}\n", "images = [os.path.join(root, v[\"image_filepath\"]) for v in data.values()]\n", "x = np.stack([np.array(Image.open(image)) for image in tqdm(images)])\n", "x = x.reshape(x.shape[0], -1)\n", "y = np.array([cls2idx[v[\"anomaly_class\"]] for v in data.values()])\n", "y_binary = np.array([0 if v[\"anomaly_class\"] == \"No-Anomaly\" else 1 for v in data.values()])\n", "print(x.shape, y.shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def train(x, y, test_size=0.1, seed=0):\n", " X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=seed, stratify=y)\n", " clf = RandomForestClassifier(random_state=seed, n_jobs=-1)\n", " clf.fit(X_train, y_train)\n", " y_pred_train = clf.predict(X_train)\n", " y_pred_test = clf.predict(X_test)\n", " train_acc = (y_pred_train == y_train).mean()\n", " test_acc = (y_pred_test == y_test).mean()\n", " return train_acc, test_acc" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6725\n", "Train accuracy: 0.9998333333333334\n", "Test accuracy: 0.657\n", "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6765\n", "Train accuracy: 0.9996666666666667\n", "Test accuracy: 0.6595\n", "Train accuracy: 0.9996111111111111\n", "Test accuracy: 0.6775\n", "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6565\n", "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6575\n", "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6675\n", "Train accuracy: 0.9996666666666667\n", "Test accuracy: 0.668\n", "Train accuracy: 0.9997222222222222\n", "Test accuracy: 0.6695\n", "Train accuracy (averaged across seeds): 0.999711111111111 5.4433105395173477e-05\n", "Test accuracy: (averaged across seeds) 0.6662 0.007672027111526655\n" ] } ], "source": [ "train_acc, test_acc = [], []\n", "\n", "for seed in range(10):\n", " train_acc_, test_acc_ = train(x, y, test_size=0.1, seed=seed)\n", " train_acc.append(train_acc_)\n", " test_acc.append(test_acc_)\n", " print(\"Train accuracy:\", train_acc_)\n", " print(\"Test accuracy:\", test_acc_)\n", "\n", "print(\"Train accuracy (averaged across seeds):\", np.mean(train_acc), np.std(train_acc)) \n", "print(\"Test accuracy: (averaged across seeds)\", np.mean(test_acc), np.std(test_acc))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train accuracy: 1.0\n", "Test accuracy: 0.8445\n", "Train accuracy: 1.0\n", "Test accuracy: 0.8405\n", "Train accuracy: 1.0\n", "Test accuracy: 0.8495\n", "Train accuracy: 1.0\n", "Test accuracy: 0.8505\n", "Train accuracy: 0.9999444444444444\n", "Test accuracy: 0.8315\n", "Train accuracy: 1.0\n", "Test accuracy: 0.841\n", "Train accuracy: 1.0\n", "Test accuracy: 0.828\n", "Train accuracy: 0.9999444444444444\n", "Test accuracy: 0.8385\n", "Train accuracy: 1.0\n", "Test accuracy: 0.8415\n", "Train accuracy: 1.0\n", "Test accuracy: 0.84\n", "Train accuracy (averaged across seeds): 0.999988888888889 2.2222222222234576e-05\n", "Test accuracy: (averaged across seeds) 0.84055 0.006631176366226449\n" ] } ], "source": [ "train_acc, test_acc = [], []\n", "\n", "for seed in range(10):\n", " train_acc_, test_acc_ = train(x, y_binary, test_size=0.1, seed=seed)\n", " train_acc.append(train_acc_)\n", " test_acc.append(test_acc_)\n", " print(\"Train accuracy:\", train_acc_)\n", " print(\"Test accuracy:\", test_acc_)\n", "\n", "print(\"Train accuracy (averaged across seeds):\", np.mean(train_acc), np.std(train_acc)) \n", "print(\"Test accuracy: (averaged across seeds)\", np.mean(test_acc), np.std(test_acc))" ] } ], "metadata": { "kernelspec": { "display_name": "torchenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" } }, "nbformat": 4, "nbformat_minor": 2 }