GDGoC-GLAU · palakkhandelwal123 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/models/linear_regressuin.py b/models/linear_regressuin.py
@@ -1,23 +1,14 @@
-# Contributing Guide
-
-We ❤️ contributions! This project is part of **Hacktoberfest**.
-
-## Steps to Contribute
-1. Fork the repo
-2. Create a new branch (`git checkout -b feature-model`)
-3. Add your model/page under `/pages`
-4. Use helper functions from `/utils`
-5. Commit and push (`git push origin feature-model`)
-6. Open a Pull Request (PR)
-
-## What You Can Work On
-- Add a new ML model (e.g., Decision Tree, KNN, SVM, etc.)
-- Improve plotting helpers
-- Add more datasets to `data_helpers`
-- Enhance UI/UX in Streamlit
-
-## Labels
-- `good first issue` → beginner-friendly
-- `feature` → add a new model
-- `bug` → fix something broken
-- `documentation` → improve docs
+# models/linear_regression_model.py
+from sklearn.linear_model import LinearRegression
+import numpy as np
+
+# Train a simple model for demonstration
+model = LinearRegression()
+X = np.array([[1], [2], [3], [4], [5]])
+y = np.array([2, 4, 6, 8, 10])
+model.fit(X, y)
+
+def predict(features):
+    arr = np.array(features).reshape(1, -1)
+    prediction = model.predict(arr)
+    return prediction.tolist()
diff --git a/pages/Linear_Regression.md b/pages/Linear_Regression.md
@@ -0,0 +1,98 @@
+# pages/Logistic_Regression.py
+
+import streamlit as st
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix, roc_auc_score
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+
+# Import existing helpers
+from utils.data_helpers import generate_classification_dataset
+from utils.plot_helpers import plot_roc_curve
+
+# -------------------------------
+# 🏷️ Page Configuration
+# -------------------------------
+st.set_page_config(page_title="Logistic Regression Simulator", layout="wide")
+st.title("🔹 Logistic Regression Model")
+
+st.write("""
+This page trains a **Logistic Regression** model on a generated dataset,  
+displays **predictions**, a **confusion matrix**, and an **ROC curve**.
+""")
+
+# -------------------------------
+# ⚙️ Sidebar Controls
+# -------------------------------
+st.sidebar.header("Dataset Configuration")
+n_samples = st.sidebar.slider("Number of Samples", 50, 1000, 200, 50)
+n_features = st.sidebar.slider("Number of Features", 2, 20, 5)
+n_informative = st.sidebar.slider("Informative Features", 1, n_features, 3)
+n_classes = st.sidebar.slider("Number of Classes", 2, 5, 2)
+
+# Generate dataset
+data = generate_classification_dataset(
+    n_samples=n_samples,
+    n_features=n_features,
+    n_informative=n_informative,
+    n_classes=n_classes
+)
+
+st.subheader("📊 Sample of Generated Dataset")
+st.dataframe(data.head())
+
+# -------------------------------
+# 🧠 Model Training
+# -------------------------------
+X = data.drop("target", axis=1)
+y = data["target"]
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=42
+)
+
+st.subheader("⚙️ Model Training")
+model = LogisticRegression(max_iter=1000)
+model.fit(X_train, y_train)
+
+st.success("✅ Model trained successfully!")
+
+# -------------------------------
+# 🔮 Predictions
+# -------------------------------
+st.subheader("🔮 Predictions on Test Set")
+y_pred = model.predict(X_test)
+y_pred_prob = model.predict_proba(X_test)[:, 1] if n_classes == 2 else None
+
+st.write("**Sample Predictions:**")
+pred_df = X_test.copy()
+pred_df["Actual"] = y_test.values
+pred_df["Predicted"] = y_pred
+st.dataframe(pred_df.head(10))
+
+# -------------------------------
+# 📉 Confusion Matrix
+# -------------------------------
+st.subheader("📉 Confusion Matrix")
+cm = confusion_matrix(y_test, y_pred)
+fig_cm, ax = plt.subplots(figsize=(5, 4))
+sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
+ax.set_xlabel("Predicted Label")
+ax.set_ylabel("True Label")
+ax.set_title("Confusion Matrix")
+st.pyplot(fig_cm)
+
+# -------------------------------
+# 📈 ROC Curve (only for binary classification)
+# -------------------------------
+if n_classes == 2:
+    st.subheader("📈 ROC Curve")
+    roc_fig = plot_roc_curve(y_test, y_pred_prob)
+    roc_auc = roc_auc_score(y_test, y_pred_prob)
+    st.write(f"**ROC AUC Score:** {roc_auc:.2f}")
+    st.pyplot(roc_fig)
+else:
+    st.info("ROC Curve is only available for binary classification.")
+
diff --git a/utils/data_helpers.py b/utils/data_helpers.py
@@ -1,30 +1,47 @@
 # utils/data_helpers.py
-
-from sklearn.datasets import make_regression
+from sklearn.datasets import make_classification
 import pandas as pd
 
-def generate_sample_regression(n_samples=100, n_features=1, noise=0.0, random_state=None):
+def generate_classification_dataset(
+    n_samples: int = 100,
+    n_features: int = 10,
+    n_informative: int = 5,
+    n_classes: int = 2,
+    random_state: int = 42
+):
     """
-    Generate a sample regression dataset.
+    Generate a synthetic classification dataset.
 
-    Parameters:
-        n_samples (int): Number of data points.
-        n_features (int): Number of features.
-        noise (float): Standard deviation of Gaussian noise added to the output.
-        random_state (int or None): Random seed for reproducibility.
+    Parameters
+    ----------
+    n_samples : int, optional
+        Number of samples to generate (default=100).
+    n_features : int, optional
+        Total number of features (default=10).
+    n_informative : int, optional
+        Number of informative features (default=5).
+    n_classes : int, optional
+        Number of target classes (default=2).
+    random_state : int, optional
+        Random seed for reproducibility (default=42).
 
-    Returns:
-        X (pd.DataFrame): Feature dataframe of shape (n_samples, n_features)
-        y (pd.Series): Target variable of shape (n_samples,)
+    Returns
+    -------
+    data : pandas.DataFrame
+        A DataFrame containing the generated features and target column ('target').
     """
-    X, y = make_regression(
+
+    X, y = make_classification(
         n_samples=n_samples,
         n_features=n_features,
-        noise=noise,
+        n_informative=n_informative,
+        n_redundant=0,
+        n_classes=n_classes,
         random_state=random_state
     )
-    # Convert to pandas for convenience
-    X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)])
-    y_series = pd.Series(y, name='target')
-
-    return X_df, y_series
+
+    feature_names = [f"feature_{i}" for i in range(n_features)]
+    data = pd.DataFrame(X, columns=feature_names)
+    data["target"] = y
+
+    return data
diff --git a/utils/plot_helpers.py b/utils/plot_helpers.py
@@ -1,28 +1,40 @@
+# utils/plot_helpers.py
 import matplotlib.pyplot as plt
 import seaborn as sns
-from sklearn.metrics import confusion_matrix, roc_curve, auc
+from sklearn.metrics import roc_curve, auc
 
-def plot_regression_line(X, y, model):
-    plt.figure()
-    plt.scatter(X, y, color="blue", label="Data")
-    y_pred = model.predict(X)
-    plt.plot(X, y_pred, color="red", label="Prediction")
-    plt.legend()
-    return plt
+def plot_roc_curve(y_true, y_score):
+    """
+    Plot ROC curve for a classification model.
 
-def plot_confusion_matrix(y_true, y_pred, labels):
-    cm = confusion_matrix(y_true, y_pred)
-    plt.figure()
-    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
-    plt.xlabel("Predicted")
-    plt.ylabel("Actual")
-    return plt
+    Parameters
+    ----------
+    y_true : array-like
+        True class labels (0 or 1).
+    y_score : array-like
+        Predicted probabilities or scores for the positive class.
 
-def plot_roc_curve(y_true, y_scores):
-    fpr, tpr, _ = roc_curve(y_true, y_scores)
+    Returns
+    -------
+    fig : matplotlib.figure.Figure
+        ROC curve figure object (for Streamlit display).
+    """
+
+    # Compute ROC curve and AUC
+    fpr, tpr, _ = roc_curve(y_true, y_score)
     roc_auc = auc(fpr, tpr)
-    plt.figure()
-    plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
-    plt.plot([0, 1], [0, 1], linestyle="--")
-    plt.legend()
-    return plt
+
+    # Create figure
+    sns.set(style="whitegrid")
+    fig, ax = plt.subplots(figsize=(6, 5))
+
+    ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
+    ax.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess')
+
+    ax.set_title("ROC Curve", fontsize=14)
+    ax.set_xlabel("False Positive Rate")
+    ax.set_ylabel("True Positive Rate")
+    ax.legend(loc="lower right")
+
+    plt.tight_layout()
+    return fig