Dev-Dipesh · March 22, 2025 04:31 · Mar 22, 2025
diff --git a/blockchain.md b/blockchain.md
@@ -0,0 +1,258 @@
+## Simple Test Code for Creating Blockchain + Basic AI Fraud Detection
+
+```python
+import hashlib
+import time
+
+class Block:
+  def __init__(self, index, previous_hash, timestamp, transactions):
+    self.index = index # Block number
+    self.previous_hash = previous_hash # Hash of the previous block
+    self.timestamp = timestamp # Time of block creation
+    self.transactions = transactions # List of transactions in the block
+    self.hash = self.calculate_hash() # Unique hash for the block
+
+  def calculate_hash(self):
+    # Create a unique fingerprint (SHA-256 hash) for the block
+    block_string = f"{self.index}{self.previous_hash}{self.timestamp}{self.transactions}".encode()
+    return hashlib.sha256(block_string).hexdigest()
+
+class Blockchain:
+  def __init__(self):
+    self.chain = [self.create_genesis_block()] # Start with a gensis block
+
+  def create_genesis_block(self):
+    return Block(0, "0", time.time(), "Genesis Block") # First block in the chain
+
+  def add_block(self, transactions):
+    prev_block = self.chain[-1] # Get the last block in the chain
+    new_block = Block(len(self.chain), prev_block.hash, time.time(), transactions)
+    self.chain.append(new_block) # Add the new block to the chain
+
+# Creating Blockchain & Adding Transactions
+my_chain = Blockchain()
+transactions = [
+  {"sender": "Mary", "receiver": "Daniel", "amount": 28000},
+  {"sender": "Daniel", "receiver": "Lisa", "amount": 20000},
+]
+
+my_chain.add_block(transactions)
+my_chain.add_block([{"sender": "Justin", "receiver": "Daniel", "amount": 20000}])
+```
+
+
+```python
+# Print the Blockchain
+for block in my_chain.chain:
+  print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
+```
+
+    Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
+    Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
+    Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]
+
+
+Install necessary packages
+
+
+```python
+%pip install pandas numpy scikit-learn
+```
+
+    Requirement already satisfied: pandas in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.5.3)
+    Requirement already satisfied: numpy in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.24.3)
+    Requirement already satisfied: scikit-learn in /Users/dipesh/anaconda3/lib/python3.11/site-packages (1.3.0)
+    Requirement already satisfied: python-dateutil>=2.8.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2.8.2)
+    Requirement already satisfied: pytz>=2020.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from pandas) (2022.7)
+    Requirement already satisfied: scipy>=1.5.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.10.1)
+    Requirement already satisfied: joblib>=1.1.1 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (1.2.0)
+    Requirement already satisfied: threadpoolctl>=2.0.0 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from scikit-learn) (2.2.0)
+    Requirement already satisfied: six>=1.5 in /Users/dipesh/anaconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.1->pandas) (1.16.0)
+    Note: you may need to restart the kernel to use updated packages.
+
+
+
+```python
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score
+
+# Sample dataset of past transactions
+data = pd.DataFrame([
+    {"amount": 100, "is_fraud": 0},   # Normal transaction
+    {"amount": 5000, "is_fraud": 1},  # Fraudulent transaction
+    {"amount": 200, "is_fraud": 0},   # Normal transaction
+    {"amount": 8000, "is_fraud": 1},  # Fraudulent transaction
+    {"amount": 150, "is_fraud": 0},   # Normal transaction
+    {"amount": 7500, "is_fraud": 1}   # Fraudulent transaction
+])
+
+# Split data into features (X) and labels (y)
+X = data[["amount"]]   # Features (Transaction Amount)
+y = data["is_fraud"]   # Labels (0: Legit, 1: Fraud)
+
+# Train-Test Split (80% training, 20% testing)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Train the model
+model = RandomForestClassifier(n_estimators=10, random_state=42)
+model.fit(X_train, y_train)
+
+# Evaluate model accuracy
+y_pred = model.predict(X_test)
+print("Model Accuracy:", accuracy_score(y_test, y_pred))
+
+```
+
+    Model Accuracy: 1.0
+
+
+## Code Explanation
+
+---
+
+### **Step 1: Importing Required Libraries**
+- **`pandas`** – Used for handling tabular data (like an Excel sheet).
+- **`numpy`** – Used for numerical operations (though it's not directly used in this code).
+- **`train_test_split`** – Splits data into training and testing sets.
+- **`RandomForestClassifier`** – A machine learning algorithm used for classification.
+- **`accuracy_score`** – Measures how well the model performs.
+
+---
+
+### **Step 2: Creating a Sample Dataset**
+```python
+data = pd.DataFrame([
+    {"amount": 100, "is_fraud": 0},   # Normal transaction
+    {"amount": 5000, "is_fraud": 1},  # Fraudulent transaction
+    {"amount": 200, "is_fraud": 0},   # Normal transaction
+    {"amount": 8000, "is_fraud": 1},  # Fraudulent transaction
+    {"amount": 150, "is_fraud": 0},   # Normal transaction
+    {"amount": 7500, "is_fraud": 1}   # Fraudulent transaction
+])
+```
+- This creates a **pandas DataFrame** with two columns:
+  - **`amount`** (transaction amount in dollars)
+  - **`is_fraud`** (0 = normal, 1 = fraudulent)
+- The dataset is **very small** (only 6 rows) and is used for demonstration.
+
+---
+
+### **Step 3: Splitting Data into Features (`X`) and Labels (`y`)**
+```python
+X = data[["amount"]]   # Features (Transaction Amount)
+y = data["is_fraud"]   # Labels (0: Legit, 1: Fraud)
+```
+- **`X`** – Contains **features** (the data we use to predict fraud). Here, it's just the **transaction amount**.
+- **`y`** – Contains the **target labels** (what we want to predict – fraud or not fraud).
+
+---
+
+### **Step 4: Splitting the Dataset into Training and Testing Sets**
+```python
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+```
+- **Purpose:** Splits data into **training (80%)** and **testing (20%)** sets.
+- **Why?** 
+  - The **training set** is used to teach the model.
+  - The **testing set** is used to evaluate performance.
+- **`random_state=42`** – Ensures reproducibility (so the split is the same every time you run the code).
+
+---
+
+### **Step 5: Training the Machine Learning Model**
+```python
+model = RandomForestClassifier(n_estimators=10, random_state=42)
+model.fit(X_train, y_train)
+```
+- **What is `RandomForestClassifier`?**  
+  - It’s an **ensemble machine learning algorithm**.
+  - It creates multiple **decision trees** and combines their outputs.
+  - It's good for classification tasks like fraud detection.
+- **`n_estimators=10`**  
+  - Uses **10 decision trees** in the forest.
+- **`.fit(X_train, y_train)`**  
+  - Trains the model using the **training data**.
+
+---
+
+### **Step 6: Making Predictions**
+```python
+y_pred = model.predict(X_test)
+```
+- **Predicts** whether the transactions in the test set are fraudulent or not.
+
+---
+
+### **Step 7: Evaluating the Model**
+```python
+print("Model Accuracy:", accuracy_score(y_test, y_pred))
+```
+- **`accuracy_score(y_test, y_pred)`**  
+  - Compares the model’s predictions (`y_pred`) with the actual labels (`y_test`).
+  - Outputs a **score** between 0 and 1 (1 = perfect prediction, 0 = all wrong).
+
+---
+
+## **Summary**
+1. Load necessary libraries.
+2. Create a small **dataset** of transactions.
+3. Separate the data into **features (`X`)** and **labels (`y`)**.
+4. Split the data into **training (80%)** and **testing (20%)**.
+5. Train a **Random Forest Classifier** model on the training data.
+6. Use the model to **predict fraud** in the test data.
+7. Measure the model’s **accuracy**.
+
+### **Key Takeaways**
+- **Supervised Learning:** The model learns from labeled data (`is_fraud` values).
+- **Feature Selection:** The **only feature** used here is **transaction amount**.
+- **Limitations of this Example:**
+  - The dataset is **tiny** and unrealistic. Only for educational purposes.
+  - A real-world fraud detection model would use more **features** (like transaction location, time, user behavior, etc.).
+  - We should handle **imbalanced data** (real-world fraud is much rarer than normal transactions).
+
+
+```python
+def is_fraudulent(transaction):
+  amount = np.array (transaction["amount"]).reshape (1, -1)
+  return model.predict(amount)[0] == 1 # 1 means fraud
+
+# New Transactions (AI validates before adding to blockchain)
+new_transactions = [
+  {"sender": "George", "receiver": "Henry", "amount": 900}, # Likely legit
+  {"sender": "Ivy", "receiver": "Jack", "amount": 10000} # Likely fraud
+]
+
+# Validate transactions with AI before adding to blockchain
+valid_transactions = [tx for tx in new_transactions if not is_fraudulent(tx)]
+
+if valid_transactions:
+  my_chain.add_block(valid_transactions)
+  print("Safe transactions added to the blockchain!")
+else:
+  print("No valid transactions found.")
+```
+
+    Safe transactions added to the blockchain!
+
+
+    /Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
+      warnings.warn(
+    /Users/dipesh/anaconda3/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but RandomForestClassifier was fitted with feature names
+      warnings.warn(
+
+
+
+```python
+# Print the Blockchain
+for block in my_chain.chain:
+  print(f"Index: {block.index}, Hash: {block.hash}, Previous Hash: {block.previous_hash}, Transactions: {block.transactions}")
+```
+
+    Index: 0, Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Previous Hash: 0, Transactions: Genesis Block
+    Index: 1, Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Previous Hash: 7a24a51b3590a3da51ceab1cd32475d0d88e0a8c714821ecf846efb87df6c76c, Transactions: [{'sender': 'Mary', 'receiver': 'Daniel', 'amount': 28000}, {'sender': 'Daniel', 'receiver': 'Lisa', 'amount': 20000}]
+    Index: 2, Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Previous Hash: 4dcdbd40bf3178072f72c8dbe295c37e65f1647dd1e8ffc015aa1feb7b8fa316, Transactions: [{'sender': 'Justin', 'receiver': 'Daniel', 'amount': 20000}]
+    Index: 3, Hash: 40bb6a72df826ae077b3cb8ef9ff5be3d278fcf45968e84b1ff7b43f2328b6d3, Previous Hash: 026cea2c954570c300de9f3fee6d96d37dcb23085237974d80844e95b6a7b054, Transactions: [{'sender': 'George', 'receiver': 'Henry', 'amount': 900}]
+
No results found