Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 14 additions & 23 deletions models/linear_regressuin.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,14 @@
# Contributing Guide

We ❤️ contributions! This project is part of **Hacktoberfest**.

## Steps to Contribute
1. Fork the repo
2. Create a new branch (`git checkout -b feature-model`)
3. Add your model/page under `/pages`
4. Use helper functions from `/utils`
5. Commit and push (`git push origin feature-model`)
6. Open a Pull Request (PR)

## What You Can Work On
- Add a new ML model (e.g., Decision Tree, KNN, SVM, etc.)
- Improve plotting helpers
- Add more datasets to `data_helpers`
- Enhance UI/UX in Streamlit

## Labels
- `good first issue` → beginner-friendly
- `feature` → add a new model
- `bug` → fix something broken
- `documentation` → improve docs
# models/linear_regression_model.py
from sklearn.linear_model import LinearRegression
import numpy as np

# Train a simple model for demonstration
model = LinearRegression()
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([2, 4, 6, 8, 10])
model.fit(X, y)

def predict(features):
arr = np.array(features).reshape(1, -1)
prediction = model.predict(arr)
return prediction.tolist()
98 changes: 98 additions & 0 deletions pages/Linear_Regression.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# pages/Logistic_Regression.py

import streamlit as st
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Import existing helpers
from utils.data_helpers import generate_classification_dataset
from utils.plot_helpers import plot_roc_curve

# -------------------------------
# 🏷️ Page Configuration
# -------------------------------
st.set_page_config(page_title="Logistic Regression Simulator", layout="wide")
st.title("🔹 Logistic Regression Model")

st.write("""
This page trains a **Logistic Regression** model on a generated dataset,
displays **predictions**, a **confusion matrix**, and an **ROC curve**.
""")

# -------------------------------
# ⚙️ Sidebar Controls
# -------------------------------
st.sidebar.header("Dataset Configuration")
n_samples = st.sidebar.slider("Number of Samples", 50, 1000, 200, 50)
n_features = st.sidebar.slider("Number of Features", 2, 20, 5)
n_informative = st.sidebar.slider("Informative Features", 1, n_features, 3)
n_classes = st.sidebar.slider("Number of Classes", 2, 5, 2)

# Generate dataset
data = generate_classification_dataset(
n_samples=n_samples,
n_features=n_features,
n_informative=n_informative,
n_classes=n_classes
)

st.subheader("📊 Sample of Generated Dataset")
st.dataframe(data.head())

# -------------------------------
# 🧠 Model Training
# -------------------------------
X = data.drop("target", axis=1)
y = data["target"]

X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=42
)

st.subheader("⚙️ Model Training")
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

st.success("✅ Model trained successfully!")

# -------------------------------
# 🔮 Predictions
# -------------------------------
st.subheader("🔮 Predictions on Test Set")
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1] if n_classes == 2 else None

st.write("**Sample Predictions:**")
pred_df = X_test.copy()
pred_df["Actual"] = y_test.values
pred_df["Predicted"] = y_pred
st.dataframe(pred_df.head(10))

# -------------------------------
# 📉 Confusion Matrix
# -------------------------------
st.subheader("📉 Confusion Matrix")
cm = confusion_matrix(y_test, y_pred)
fig_cm, ax = plt.subplots(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_xlabel("Predicted Label")
ax.set_ylabel("True Label")
ax.set_title("Confusion Matrix")
st.pyplot(fig_cm)

# -------------------------------
# 📈 ROC Curve (only for binary classification)
# -------------------------------
if n_classes == 2:
st.subheader("📈 ROC Curve")
roc_fig = plot_roc_curve(y_test, y_pred_prob)
roc_auc = roc_auc_score(y_test, y_pred_prob)
st.write(f"**ROC AUC Score:** {roc_auc:.2f}")
st.pyplot(roc_fig)
else:
st.info("ROC Curve is only available for binary classification.")

55 changes: 36 additions & 19 deletions utils/data_helpers.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,47 @@
# utils/data_helpers.py

from sklearn.datasets import make_regression
from sklearn.datasets import make_classification
import pandas as pd

def generate_sample_regression(n_samples=100, n_features=1, noise=0.0, random_state=None):
def generate_classification_dataset(
n_samples: int = 100,
n_features: int = 10,
n_informative: int = 5,
n_classes: int = 2,
random_state: int = 42
):
"""
Generate a sample regression dataset.
Generate a synthetic classification dataset.

Parameters:
n_samples (int): Number of data points.
n_features (int): Number of features.
noise (float): Standard deviation of Gaussian noise added to the output.
random_state (int or None): Random seed for reproducibility.
Parameters
----------
n_samples : int, optional
Number of samples to generate (default=100).
n_features : int, optional
Total number of features (default=10).
n_informative : int, optional
Number of informative features (default=5).
n_classes : int, optional
Number of target classes (default=2).
random_state : int, optional
Random seed for reproducibility (default=42).

Returns:
X (pd.DataFrame): Feature dataframe of shape (n_samples, n_features)
y (pd.Series): Target variable of shape (n_samples,)
Returns
-------
data : pandas.DataFrame
A DataFrame containing the generated features and target column ('target').
"""
X, y = make_regression(

X, y = make_classification(
n_samples=n_samples,
n_features=n_features,
noise=noise,
n_informative=n_informative,
n_redundant=0,
n_classes=n_classes,
random_state=random_state
)
# Convert to pandas for convenience
X_df = pd.DataFrame(X, columns=[f'feature_{i+1}' for i in range(n_features)])
y_series = pd.Series(y, name='target')

return X_df, y_series

feature_names = [f"feature_{i}" for i in range(n_features)]
data = pd.DataFrame(X, columns=feature_names)
data["target"] = y

return data
56 changes: 34 additions & 22 deletions utils/plot_helpers.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,40 @@
# utils/plot_helpers.py
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc
from sklearn.metrics import roc_curve, auc

def plot_regression_line(X, y, model):
plt.figure()
plt.scatter(X, y, color="blue", label="Data")
y_pred = model.predict(X)
plt.plot(X, y_pred, color="red", label="Prediction")
plt.legend()
return plt
def plot_roc_curve(y_true, y_score):
"""
Plot ROC curve for a classification model.

def plot_confusion_matrix(y_true, y_pred, labels):
cm = confusion_matrix(y_true, y_pred)
plt.figure()
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("Actual")
return plt
Parameters
----------
y_true : array-like
True class labels (0 or 1).
y_score : array-like
Predicted probabilities or scores for the positive class.

def plot_roc_curve(y_true, y_scores):
fpr, tpr, _ = roc_curve(y_true, y_scores)
Returns
-------
fig : matplotlib.figure.Figure
ROC curve figure object (for Streamlit display).
"""

# Compute ROC curve and AUC
fpr, tpr, _ = roc_curve(y_true, y_score)
roc_auc = auc(fpr, tpr)
plt.figure()
plt.plot(fpr, tpr, label=f"AUC = {roc_auc:.2f}")
plt.plot([0, 1], [0, 1], linestyle="--")
plt.legend()
return plt

# Create figure
sns.set(style="whitegrid")
fig, ax = plt.subplots(figsize=(6, 5))

ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
ax.plot([0, 1], [0, 1], color='gray', linestyle='--', label='Random Guess')

ax.set_title("ROC Curve", fontsize=14)
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.legend(loc="lower right")

plt.tight_layout()
return fig