diff --git a/DAY 2/Jupyter Notebooks/ml-sig-scikit-learn.ipynb b/DAY 2/Jupyter Notebooks/ml-sig-scikit-learn.ipynb
new file mode 100644
index 0000000..39f5db1
--- /dev/null
+++ b/DAY 2/Jupyter Notebooks/ml-sig-scikit-learn.ipynb
@@ -0,0 +1 @@
+{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[],"dockerImageVersionId":30918,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"# Linear Regression with scikit-learn\n\nIn this notebook, we'll explore linear regression using scikit-learn. We'll cover both single-variable (simple) and multivariable (multiple) linear regression, including data generation, model fitting, evaluation, and visualization.","metadata":{"_uuid":"db19bda7-dc86-4042-932e-471c742fbcc1","_cell_guid":"02063fa2-f2fe-4e29-89fa-9e134a98a000","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"markdown","source":"## 1. Setup and Imports","metadata":{"_uuid":"4339a9f0-adcd-4ab0-8abb-5a02478f158c","_cell_guid":"12f49a67-bce3-4152-8782-8b0dd5aeee80","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom sklearn.linear_model import LinearRegression\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import mean_squared_error, r2_score\nfrom sklearn.datasets import make_regression\n\n# Set plotting style\\ sns.set(style=\"whitegrid\")","metadata":{"_uuid":"94f0644f-0cb1-4bfc-91a1-79f9a0e0d337","_cell_guid":"1792b567-daf8-4998-9304-cf78f2e2df69","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:26.474325Z","iopub.execute_input":"2025-04-06T05:29:26.474603Z","iopub.status.idle":"2025-04-06T05:29:29.461010Z","shell.execute_reply.started":"2025-04-06T05:29:26.474570Z","shell.execute_reply":"2025-04-06T05:29:29.459861Z"}},"outputs":[],"execution_count":1},{"cell_type":"markdown","source":"## 2. Single-Variable (Simple) Linear Regression","metadata":{"_uuid":"27eb3fa3-118e-4009-9e7a-2f3c06eddaab","_cell_guid":"b637b39b-425b-4f2d-ba22-f0766c8587b3","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"markdown","source":"**2.1 Generate Synthetic Data**","metadata":{"_uuid":"25ebf501-7eeb-467d-b118-dfbbeea79c6d","_cell_guid":"4d9199fb-62a0-4bca-8c1a-6fe281df4597","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"# Generate a linear relationship y = 2x + noise\ntheta0 = 1.5 # intercept\ntheta1 = 2.0 # slope\nn_samples = 100\nnp.random.seed(42)\nX_single = 2 * np.random.rand(n_samples, 1)\ny_single = theta0 + theta1 * X_single.flatten() + np.random.randn(n_samples) * 0.5\n\n# Convert to DataFrame for convenience\ndf_single = pd.DataFrame({\"x\": X_single.flatten(), \"y\": y_single})\ndf_single.head()","metadata":{"_uuid":"9b8f4d03-7572-41ab-962f-448f171c660b","_cell_guid":"5888ce7a-9cad-4057-b24d-886fad37d017","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:29.462305Z","iopub.execute_input":"2025-04-06T05:29:29.462840Z","iopub.status.idle":"2025-04-06T05:29:29.501652Z","shell.execute_reply.started":"2025-04-06T05:29:29.462801Z","shell.execute_reply":"2025-04-06T05:29:29.500346Z"}},"outputs":[{"execution_count":2,"output_type":"execute_result","data":{"text/plain":" x y\n0 0.749080 3.041684\n1 1.901429 5.153354\n2 1.463988 4.473856\n3 1.197317 2.900849\n4 0.312037 2.014239","text/html":"
\n\n
\n \n
\n
\n
x
\n
y
\n
\n \n \n
\n
0
\n
0.749080
\n
3.041684
\n
\n
\n
1
\n
1.901429
\n
5.153354
\n
\n
\n
2
\n
1.463988
\n
4.473856
\n
\n
\n
3
\n
1.197317
\n
2.900849
\n
\n
\n
4
\n
0.312037
\n
2.014239
\n
\n \n
\n
"},"metadata":{}}],"execution_count":2},{"cell_type":"markdown","source":"**2.2 Visualize Data**","metadata":{"_uuid":"020ab89b-d730-400c-aba9-93fa8af40838","_cell_guid":"e9d548e4-5507-4d74-9795-c93a875cb89f","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"plt.figure(figsize=(8, 5))\nplt.scatter(df_single['x'], df_single['y'], color='blue', alpha=0.6)\nplt.title('Synthetic Data for Simple Linear Regression')\nplt.xlabel('x')\nplt.ylabel('y')\nplt.show()","metadata":{"_uuid":"31ba00df-97c6-44bc-b0f3-5c165e01dd30","_cell_guid":"744f9bef-f655-43b1-84d7-57c053570660","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:29.504021Z","iopub.execute_input":"2025-04-06T05:29:29.504351Z","iopub.status.idle":"2025-04-06T05:29:29.889889Z","shell.execute_reply.started":"2025-04-06T05:29:29.504324Z","shell.execute_reply":"2025-04-06T05:29:29.888677Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"","image/png":"\n"},"metadata":{}}],"execution_count":3},{"cell_type":"markdown","source":"**2.3 Fit the Model**","metadata":{"_uuid":"5b980ac8-fffe-4245-8939-18604a6795de","_cell_guid":"eea0a510-9519-4aaa-87a7-e9ee78306583","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"# Initialize and fit the linear regression model\nmodel_simple = LinearRegression()\nmodel_simple.fit(X_single, y_single)\n\n# Extract parameters\nintercept_simple = model_simple.intercept_\nslope_simple = model_simple.coef_[0]\nprint(f\"Fitted intercept: {intercept_simple:.3f}\")\nprint(f\"Fitted slope: {slope_simple:.3f}\")","metadata":{"_uuid":"fdbb9a3d-612f-4557-a889-dc524e62ccfe","_cell_guid":"3ac537f0-2d0f-4a9b-9ed6-8c10378746a1","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:29.891782Z","iopub.execute_input":"2025-04-06T05:29:29.892072Z","iopub.status.idle":"2025-04-06T05:29:29.942792Z","shell.execute_reply.started":"2025-04-06T05:29:29.892047Z","shell.execute_reply":"2025-04-06T05:29:29.941792Z"}},"outputs":[{"name":"stdout","text":"Fitted intercept: 1.608\nFitted slope: 1.885\n","output_type":"stream"}],"execution_count":4},{"cell_type":"markdown","source":"**2.4 Plot Regression Line**","metadata":{"_uuid":"c6e5cb37-5f24-44cc-a5a0-57df8bf09b84","_cell_guid":"6d24cd0c-db30-4e66-9a6d-501267fc692d","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"# Generate predictions for plotting\ny_pred_line = model_simple.predict(X_single)\n\nplt.figure(figsize=(8, 5))\nplt.scatter(X_single, y_single, color='blue', alpha=0.6, label='Data')\nplt.plot(X_single, y_pred_line, color='red', linewidth=2, label='Fitted line')\nplt.title('Simple Linear Regression Fit')\nplt.xlabel('x')\nplt.ylabel('y')\nplt.legend()\nplt.show()","metadata":{"_uuid":"ed0bba75-3c9f-4d1b-a15f-60827b868b0c","_cell_guid":"d3bc4347-b3ad-463d-9f5e-13535dfb755d","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:29.943903Z","iopub.execute_input":"2025-04-06T05:29:29.944193Z","iopub.status.idle":"2025-04-06T05:29:30.215917Z","shell.execute_reply.started":"2025-04-06T05:29:29.944169Z","shell.execute_reply":"2025-04-06T05:29:30.214607Z"}},"outputs":[{"output_type":"display_data","data":{"text/plain":"","image/png":"\n"},"metadata":{}}],"execution_count":5},{"cell_type":"markdown","source":"**2.5 Model Evaluation**","metadata":{"_uuid":"e8305f74-9f2d-458b-8afa-19331e54fca0","_cell_guid":"a4cf7f2b-701f-4d97-9e7c-858f65227bc7","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"mse_simple = mean_squared_error(y_single, y_pred_line)\nr2_simple = r2_score(y_single, y_pred_line)\nprint(f\"Mean Squared Error: {mse_simple:.3f}\")\nprint(f\"R^2 Score: {r2_simple:.3f}\")","metadata":{"_uuid":"54171561-75dc-4ab9-b386-156cfece97aa","_cell_guid":"8f2104bd-7472-46bb-8a18-4a9d6d5859ba","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.217108Z","iopub.execute_input":"2025-04-06T05:29:30.217515Z","iopub.status.idle":"2025-04-06T05:29:30.225331Z","shell.execute_reply.started":"2025-04-06T05:29:30.217477Z","shell.execute_reply":"2025-04-06T05:29:30.224063Z"}},"outputs":[{"name":"stdout","text":"Mean Squared Error: 0.202\nR^2 Score: 0.861\n","output_type":"stream"}],"execution_count":6},{"cell_type":"markdown","source":"## 3. Multivariable (Multiple) Linear Regression","metadata":{"_uuid":"0168d86d-c92c-4600-a89c-3267dbfbb000","_cell_guid":"5dceae4f-03e2-4bb5-b79d-3bea9627c9d5","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"markdown","source":"**3.1 Generate Synthetic Data**","metadata":{"_uuid":"6d8dd02d-22fe-42ef-932f-ae5240adf0c0","_cell_guid":"c885a5d2-6ed2-4b5f-9696-892add72da84","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"# Create a dataset with 3 features\nd X_multi, y_multi, coef_true = make_regression(\n n_samples=200,\n n_features=3,\n noise=10.0,\n coef=True,\n random_state=42\n)\n\n# Create DataFrame\nfeature_names = [f\"x{i+1}\" for i in range(X_multi.shape[1])]\ndf_multi = pd.DataFrame(X_multi, columns=feature_names)\ndf_multi['y'] = y_multi\n\ndf_multi.head()","metadata":{"_uuid":"02b5d062-3b69-4f22-b490-d50aef160fc7","_cell_guid":"64058309-b140-4112-8ffb-5db0073d1275","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.226677Z","iopub.execute_input":"2025-04-06T05:29:30.227054Z","iopub.status.idle":"2025-04-06T05:29:30.247316Z","shell.execute_reply.started":"2025-04-06T05:29:30.227018Z","shell.execute_reply":"2025-04-06T05:29:30.244805Z"}},"outputs":[{"traceback":["\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m d X_multi, y_multi, coef_true = make_regression(\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"],"ename":"SyntaxError","evalue":"invalid syntax (, line 2)","output_type":"error"}],"execution_count":7},{"cell_type":"markdown","source":"**3.2 Explore Feature Relationships**","metadata":{"_uuid":"d3d45173-f5e2-446f-a68b-14a2ead78fac","_cell_guid":"82b75349-1192-4bb0-a545-48e491104a6c","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"sns.pairplot(df_multi.sample(100), x_vars=feature_names, y_vars='y', height=3)\nplt.suptitle('Pairplot of Features vs. Target', y=1.02)\nplt.show()","metadata":{"_uuid":"d0376afb-baf5-4206-ba9c-e001b3887a77","_cell_guid":"b8aa7bc7-04a3-4e5c-903b-9cd027dea6cc","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.248352Z","iopub.status.idle":"2025-04-06T05:29:30.248908Z","shell.execute_reply":"2025-04-06T05:29:30.248692Z"}},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**3.3 Split into Train and Test Sets**","metadata":{"_uuid":"cfc96b7f-a276-491a-a153-ff9ab4e04564","_cell_guid":"5b4a7e26-4491-4d21-a21e-c30e632ec2b4","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"X_train, X_test, y_train, y_test = train_test_split(\n df_multi[feature_names], df_multi['y'], test_size=0.2, random_state=42\n)\n\nprint(f\"Training samples: {X_train.shape[0]}\")\nprint(f\"Test samples: {X_test.shape[0]}\")","metadata":{"_uuid":"21386997-68dc-4aa6-832a-bcc438782e09","_cell_guid":"cbd46203-ed60-4fec-b956-1e2030f1526f","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.249989Z","iopub.status.idle":"2025-04-06T05:29:30.250363Z","shell.execute_reply":"2025-04-06T05:29:30.250215Z"}},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**3.4 Fit the Multiple Linear Regression Model**","metadata":{"_uuid":"b3836cc7-8c12-475a-ae3a-28b01473de4b","_cell_guid":"3c7f8ff2-23a4-44c9-8454-794a2dae4af9","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"model_multi = LinearRegression()\nmodel_multi.fit(X_train, y_train)\n\n# Extract coefficients\nintercept_multi = model_multi.intercept_\ncoef_multi = model_multi.coef_\nprint(f\"Fitted intercept: {intercept_multi:.3f}\")\nfor name, coef in zip(feature_names, coef_multi):\n print(f\"Coefficient for {name}: {coef:.3f}\")","metadata":{"_uuid":"1bd9010c-fad5-482a-9966-a21d43f6b4c1","_cell_guid":"33617041-92e5-45b2-a483-2924e9bb74bc","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.251447Z","iopub.status.idle":"2025-04-06T05:29:30.251823Z","shell.execute_reply":"2025-04-06T05:29:30.251692Z"}},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**3.5 Evaluate on Test Set**","metadata":{"_uuid":"7da409c2-0f5b-4834-b7cf-f2727a5059bb","_cell_guid":"6768962b-41be-4a31-95a6-027aed099aea","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"# Predict\ny_pred_multi = model_multi.predict(X_test)\n\n# Metrics\nmse_multi = mean_squared_error(y_test, y_pred_multi)\nr2_multi = r2_score(y_test, y_pred_multi)\nprint(f\"Test Mean Squared Error: {mse_multi:.3f}\")\nprint(f\"Test R^2 Score: {r2_multi:.3f}\")","metadata":{"_uuid":"f053197f-4d86-4f0f-9f9a-b7045bc306fe","_cell_guid":"f43a8a41-eb73-4234-948c-da9d7d1a41a0","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.252569Z","iopub.status.idle":"2025-04-06T05:29:30.252939Z","shell.execute_reply":"2025-04-06T05:29:30.252803Z"}},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"**3.6 Residual Plot**","metadata":{"_uuid":"f7865ff0-f861-49e0-8a34-5d5b525c6e7e","_cell_guid":"eafa87f0-1ea7-4744-bec2-857e21257299","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}},{"cell_type":"code","source":"residuals = y_test - y_pred_multi\nplt.figure(figsize=(8, 5))\nplt.scatter(y_pred_multi, residuals, alpha=0.6)\nplt.axhline(0, color='red', linestyle='--')\nplt.title('Residuals vs. Predicted Values')\nplt.xlabel('Predicted Values')\nplt.ylabel('Residuals')\nplt.show()","metadata":{"_uuid":"d567ba0b-2166-4431-a40c-bab986f5eb9c","_cell_guid":"5e81d796-587f-46be-b1cc-10b87f3ab63e","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false},"execution":{"iopub.status.busy":"2025-04-06T05:29:30.253867Z","iopub.status.idle":"2025-04-06T05:29:30.254255Z","shell.execute_reply":"2025-04-06T05:29:30.254110Z"}},"outputs":[],"execution_count":null},{"cell_type":"markdown","source":"## 4. Conclusion\n\nIn this notebook, we covered:\n- Generating synthetic data for regression tasks\n- Fitting and visualizing simple linear regression models\n- Evaluating model performance using MSE and R²\n- Extending to multiple features with multiple linear regression\n- Examining model coefficients and residuals\n\nNext steps could include exploring regularized linear models (Ridge, Lasso), polynomial regression, and cross-validation techniques.","metadata":{"_uuid":"b3c99095-1632-49ae-82a3-65cdf9f18bc5","_cell_guid":"7ffc2c29-5ec9-47fb-83c0-4a632b695176","trusted":true,"collapsed":false,"jupyter":{"outputs_hidden":false}}}]}
\ No newline at end of file