Created
March 22, 2025 04:31
-
-
Save Dev-Dipesh/9977202b70d8c14e9c6d1adf543be207 to your computer and use it in GitHub Desktop.
Revisions
-
Dev-Dipesh created this gist
Mar 22, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,258 @@ ## Simple Test Code for Creating Blockchain + Basic AI Fraud Detection ```python import hashlib import time class Block: def __init__(self, index, previous_hash, timestamp, transactions): self.index = index # Block number self.previous_hash = previous_hash # Hash of the previous block self.timestamp = timestamp # Time of block creation self.transactions = transactions # List of transactions in the block self.hash = self.calculate_hash() # Unique hash for the block def calculate_hash(self): # Create a unique fingerprint (SHA-256 hash) for the block block_string = f"{self.index}{self.previous_hash}{self.timestamp}{self.transactions}".encode() return hashlib.sha256(block_string).hexdigest() class Blockchain: def __init__(self): self.chain = [self.create_genesis_block()] # Start with a gensis block def create_genesis_block(self): return Block(0, "0", time.time(), "Genesis Block") # First block in the chain def add_block(self, transactions): prev_block = self.chain[-1] # Get the last block in the chain new_block = Block(len(self.chain), prev_block.hash, time.time(), transactions) self.chain.append(new_block) # Add the new block to the chain # Creating Blockchain & Adding Transactions my_chain = Blockchain() transactions = [ {"sender": "Mary", "receiver": "Daniel", "amount": 28000}, {"sender": "Daniel", "receiver": "Lisa", "amount": 20000}, ] my_chain.add_block(transactions) my_chain.add_block([{"sender": "Justin", "receiver": "Daniel", "amount": 20000}]) ``` ```python # Print the Blockchain for block in my_chain.chain: print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}") ``` Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}] Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}] Install necessary packages ```python %pip install pandas numpy scikit-learn ``` Requirement already satisfied: pandas in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.5.3) Requirement already satisfied: numpy in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.24.3) Requirement already satisfied: scikit-learn in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.3.0) Requirement already satisfied: python-dateutil>=2.8.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2.8.2) Requirement already satisfied: pytz>=2020.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2022.7) Requirement already satisfied: scipy>=1.5.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.10.1) Requirement already satisfied: joblib>=1.1.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.2.0) Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (2.2.0) Requirement already satisfied: six>=1.5 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0) Note: you may need to restart the kernel to use updated packages. ```python import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score # Sample dataset of past transactions data = pd.DataFrame([ {"amount": 100, "is_fraud": 0}, # Normal transaction {"amount": 5000, "is_fraud": 1}, # Fraudulent transaction {"amount": 200, "is_fraud": 0}, # Normal transaction {"amount": 8000, "is_fraud": 1}, # Fraudulent transaction {"amount": 150, "is_fraud": 0}, # Normal transaction {"amount": 7500, "is_fraud": 1} # Fraudulent transaction ]) # Split data into features (X) and labels (y) X = data[["amount"]] # Features (Transaction Amount) y = data["is_fraud"] # Labels (0: Legit, 1: Fraud) # Train-Test Split (80% training, 20% testing) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train the model model = RandomForestClassifier(n_estimators=10, random_state=42) model.fit(X_train, y_train) # Evaluate model accuracy y_pred = model.predict(X_test) print("Model Accuracy:", accuracy_score(y_test, y_pred)) ``` Model Accuracy: 1.0 ## Code Explanation --- ### **Step 1: Importing Required Libraries** - **`pandas`** – Used for handling tabular data (like an Excel sheet). - **`numpy`** – Used for numerical operations (though it's not directly used in this code). - **`train_test_split`** – Splits data into training and testing sets. - **`RandomForestClassifier`** – A machine learning algorithm used for classification. - **`accuracy_score`** – Measures how well the model performs. --- ### **Step 2: Creating a Sample Dataset** ```python data = pd.DataFrame([ {"amount": 100, "is_fraud": 0}, # Normal transaction {"amount": 5000, "is_fraud": 1}, # Fraudulent transaction {"amount": 200, "is_fraud": 0}, # Normal transaction {"amount": 8000, "is_fraud": 1}, # Fraudulent transaction {"amount": 150, "is_fraud": 0}, # Normal transaction {"amount": 7500, "is_fraud": 1} # Fraudulent transaction ]) ``` - This creates a **pandas DataFrame** with two columns: - **`amount`** (transaction amount in dollars) - **`is_fraud`** (0 = normal, 1 = fraudulent) - The dataset is **very small** (only 6 rows) and is used for demonstration. --- ### **Step 3: Splitting Data into Features (`X`) and Labels (`y`)** ```python X = data[["amount"]] # Features (Transaction Amount) y = data["is_fraud"] # Labels (0: Legit, 1: Fraud) ``` - **`X`** – Contains **features** (the data we use to predict fraud). Here, it's just the **transaction amount**. - **`y`** – Contains the **target labels** (what we want to predict – fraud or not fraud). --- ### **Step 4: Splitting the Dataset into Training and Testing Sets** ```python X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) ``` - **Purpose:** Splits data into **training (80%)** and **testing (20%)** sets. - **Why?** - The **training set** is used to teach the model. - The **testing set** is used to evaluate performance. - **`random_state=42`** – Ensures reproducibility (so the split is the same every time you run the code). --- ### **Step 5: Training the Machine Learning Model** ```python model = RandomForestClassifier(n_estimators=10, random_state=42) model.fit(X_train, y_train) ``` - **What is `RandomForestClassifier`?** - It’s an **ensemble machine learning algorithm**. - It creates multiple **decision trees** and combines their outputs. - It's good for classification tasks like fraud detection. - **`n_estimators=10`** - Uses **10 decision trees** in the forest. - **`.fit(X_train, y_train)`** - Trains the model using the **training data**. --- ### **Step 6: Making Predictions** ```python y_pred = model.predict(X_test) ``` - **Predicts** whether the transactions in the test set are fraudulent or not. --- ### **Step 7: Evaluating the Model** ```python print("Model Accuracy:", accuracy_score(y_test, y_pred)) ``` - **`accuracy_score(y_test, y_pred)`** - Compares the model’s predictions (`y_pred`) with the actual labels (`y_test`). - Outputs a **score** between 0 and 1 (1 = perfect prediction, 0 = all wrong). --- ## **Summary** 1. Load necessary libraries. 2. Create a small **dataset** of transactions. 3. Separate the data into **features (`X`)** and **labels (`y`)**. 4. Split the data into **training (80%)** and **testing (20%)**. 5. Train a **Random Forest Classifier** model on the training data. 6. Use the model to **predict fraud** in the test data. 7. Measure the model’s **accuracy**. ### **Key Takeaways** - **Supervised Learning:** The model learns from labeled data (`is_fraud` values). - **Feature Selection:** The **only feature** used here is **transaction amount**. - **Limitations of this Example:** - The dataset is **tiny** and unrealistic. Only for educational purposes. - A real-world fraud detection model would use more **features** (like transaction location, time, user behavior, etc.). - We should handle **imbalanced data** (real-world fraud is much rarer than normal transactions). ```python def is_fraudulent(transaction): amount = np.array (transaction["amount"]).reshape (1, -1) return model.predict(amount)[0] == 1 # 1 means fraud # New Transactions (AI validates before adding to blockchain) new_transactions = [ {"sender": "George", "receiver": "Henry", "amount": 900}, # Likely legit {"sender": "Ivy", "receiver": "Jack", "amount": 10000} # Likely fraud ] # Validate transactions with AI before adding to blockchain valid_transactions = [tx for tx in new_transactions if not is_fraudulent(tx)] if valid_transactions: my_chain.add_block(valid_transactions) print("Safe transactions added to the blockchain!") else: print("No valid transactions found.") ``` Safe transactions added to the blockchain! /Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names warnings.warn( /Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names warnings.warn( ```python # Print the Blockchain for block in my_chain.chain: print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}") ``` Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}] Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}] Index: 3, Hash: 40bb6a72df826ae077b3cb8ef9ff5be3d278fcf45968e84b1ff7b43f2328b6d3, Previous Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Transactions: [{'sender': 'George', 'receiver': 'Henry', 'amount': 900}]