Fixing Adaboost algorithm

ai-nerdy · ai-nerdy · commit 7f2785758ce2 · 2025-10-06T11:52:31.000-05:00
diff --git a/machine_learning/adaboost.py b/machine_learning/adaboost.py
@@ -18,73 +18,103 @@
 
 class AdaBoost:
     def __init__(self, n_estimators: int = 50) -> None:
-        """Initialize AdaBoost classifier.
+        """
+        Initialize AdaBoost classifier.
+
         Args:
-            n_estimators: Number of boosting rounds.
+            n_estimators: Number of boosting rounds (weak learners).
         """
         self.n_estimators: int = n_estimators
-        self.alphas: list[float] = []  # Weights for each weak learner
-        self.models: list[dict[str, Any]] = []  # List of weak learners (stumps)
+        self.alphas: list[float] = []  # Weights assigned to each weak learner
+        self.models: list[dict[str, Any]] = []  # Stores each decision stump
 
     def fit(self, feature_matrix: np.ndarray, target: np.ndarray) -> None:
-        """Fit AdaBoost model.
+        """
+        Train AdaBoost model using decision stumps.
+
         Args:
-            feature_matrix: (n_samples, n_features) feature matrix
-            target: (n_samples,) labels (0 or 1)
+            feature_matrix: 2D array of shape (n_samples, n_features)
+            target: 1D array of binary labels (0 or 1)
         """
-        n_samples, _n_features = feature_matrix.shape
-        sample_weights = np.ones(n_samples) / n_samples  # Initialize sample weights
+        n_samples, _ = feature_matrix.shape
+
+        # Initialize uniform sample weights
+        sample_weights = np.ones(n_samples) / n_samples
+
+        # Reset model state
         self.models = []
         self.alphas = []
-        y_signed = np.where(target == 0, -1, 1)  # Convert labels to -1, 1
+
+        # Convert labels to {-1, 1} for boosting
+        y_signed = np.where(target == 0, -1, 1)
+
         for _ in range(self.n_estimators):
-            # Train a decision stump with weighted samples
+            # Train a weighted decision stump
             stump = self._build_stump(feature_matrix, y_signed, sample_weights)
             pred = stump["pred"]
             err = stump["error"]
-            # Compute alpha (learner weight)
+
+            # Compute alpha (learner weight) with numerical stability
             alpha = 0.5 * np.log((1 - err) / (err + 1e-10))
-            # Update sample weights
+
+            # Update sample weights to focus on misclassified points
             sample_weights *= np.exp(-alpha * y_signed * pred)
             sample_weights /= np.sum(sample_weights)
+
+            # Store the stump and its weight
             self.models.append(stump)
             self.alphas.append(alpha)
 
     def predict(self, feature_matrix: np.ndarray) -> np.ndarray:
-        """Predict class labels for samples in feature_matrix.
+        """
+        Predict binary class labels for input samples.
+
         Args:
-            feature_matrix: (n_samples, n_features) feature matrix
+            feature_matrix: 2D array of shape (n_samples, n_features)
+
         Returns:
-            (n_samples,) predicted labels (0 or 1)
-        >>> import numpy as np
-        >>> features = np.array([[0, 0], [1, 1], [1, 0], [0, 1]])
-        >>> labels = np.array([0, 1, 1, 0])
-        >>> clf = AdaBoost(n_estimators=5)
-        >>> clf.fit(features, labels)
-        >>> clf.predict(np.array([[0, 0], [1, 1]]))
-        array([0, 1])
+            1D array of predicted labels (0 or 1)
         """
         clf_preds = np.zeros(feature_matrix.shape[0])
+
+        # Aggregate predictions from all stumps
         for alpha, stump in zip(self.alphas, self.models):
             pred = self._stump_predict(
-                feature_matrix, stump["feature"], stump["threshold"], stump["polarity"]
+                feature_matrix,
+                stump["feature"],
+                stump["threshold"],
+                stump["polarity"],
             )
             clf_preds += alpha * pred
+
+        # Final prediction: sign of weighted sum
         return np.where(clf_preds >= 0, 1, 0)
 
     def _build_stump(
-        self, feature_matrix: np.ndarray, target_signed: np.ndarray, sample_weights: np.ndarray
+        self,
+        feature_matrix: np.ndarray,
+        target_signed: np.ndarray,
+        sample_weights: np.ndarray,
     ) -> dict[str, Any]:
-        """Find the best decision stump for current weights."""
-        _n_samples, n_features = feature_matrix.shape
+        """
+        Build the best decision stump for current sample weights.
+
+        Returns:
+            Dictionary containing stump parameters and predictions.
+        """
+        _, n_features = feature_matrix.shape
         min_error = float("inf")
         best_stump: dict[str, Any] = {}
+
+        # Iterate over all features and thresholds
         for feature in range(n_features):
             thresholds = np.unique(feature_matrix[:, feature])
             for threshold in thresholds:
                 for polarity in [1, -1]:
                     pred = self._stump_predict(feature_matrix, feature, threshold, polarity)
                     error = np.sum(sample_weights * (pred != target_signed))
+
+                    # Keep stump with lowest weighted error
                     if error < min_error:
                         min_error = error
                         best_stump = {
@@ -94,15 +124,28 @@ def _build_stump(
                             "error": error,
                             "pred": pred.copy(),
                         }
+
         return best_stump
 
     def _stump_predict(
-        self, feature_matrix: np.ndarray, feature: int, threshold: float, polarity: int
+        self,
+        feature_matrix: np.ndarray,
+        feature: int,
+        threshold: float,
+        polarity: int,
     ) -> np.ndarray:
-        """Predict using a single decision stump."""
+        """
+        Predict using a single decision stump.
+
+        Returns:
+            1D array of predictions in {-1, 1}
+        """
         pred = np.ones(feature_matrix.shape[0])
+
+        # Apply polarity to threshold comparison
         if polarity == 1:
             pred[feature_matrix[:, feature] < threshold] = -1
         else:
             pred[feature_matrix[:, feature] > threshold] = -1
-        return pred
+
+        return pred