-
Notifications
You must be signed in to change notification settings - Fork 0
/
dashboard.py
180 lines (148 loc) · 7.67 KB
/
dashboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import streamlit as st
import plotly.express as px
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')
st.set_page_config(page_title="Superstore", page_icon=":bar_chart:", layout="wide")
st.title(" :bar_chart: Sample SuperStore EDA")
st.markdown('<style>div.block-container{padding-top:2rem;}</style>', unsafe_allow_html=True)
# File uploader
fl = st.file_uploader(":file_folder: Upload a file", type=(["csv", "txt", "xlsx", "xls"]))
if fl is not None:
filename = fl.name
st.write(f"Filename: {filename}")
# Read the uploaded file based on its type
if filename.endswith('.csv') or filename.endswith('.txt'):
df = pd.read_csv(fl, encoding="ISO-8859-1")
elif filename.endswith('.xlsx') or filename.endswith('.xls'):
df = pd.read_excel(fl, engine='openpyxl')
else:
# Default file path (relative to the script location)
default_file_path = "SampleSuperstore.csv"
if os.path.exists(default_file_path):
df = pd.read_csv(default_file_path, encoding="ISO-8859-1")
else:
st.error("No file uploaded and the default file does not exist.")
st.stop()
# Ensure the data frame has been loaded
if df is not None:
# Convert "Order Date" column to datetime
df["Order Date"] = pd.to_datetime(df["Order Date"])
# Display the first few rows of the dataframe
st.write("First few rows of the data:")
st.write(df.head())
# Create columns layout
col1, col2 = st.columns((2))
# Getting the min and max date
startDate = pd.to_datetime(df["Order Date"]).min()
endDate = pd.to_datetime(df["Order Date"]).max()
with col1:
date1 = pd.to_datetime(st.date_input("Start Date", startDate))
with col2:
date2 = pd.to_datetime(st.date_input("End Date", endDate))
df = df[(df["Order Date"] >= date1) & (df["Order Date"] <= date2)].copy()
st.sidebar.header("Choose your filter: ")
# Create for Region
region = st.sidebar.multiselect("Pick your Region", df["Region"].unique())
if not region:
df2 = df.copy()
else:
df2 = df[df["Region"].isin(region)]
# Create for State
state = st.sidebar.multiselect("Pick the State", df2["State"].unique())
if not state:
df3 = df2.copy()
else:
df3 = df2[df2["State"].isin(state)]
# Create for City
city = st.sidebar.multiselect("Pick the City", df3["City"].unique())
# Filter the data based on Region, State and City
if not region and not state and not city:
filtered_df = df
elif not state and not city:
filtered_df = df[df["Region"].isin(region)]
elif not region and not city:
filtered_df = df[df["State"].isin(state)]
elif state and city:
filtered_df = df3[df["State"].isin(state) & df3["City"].isin(city)]
elif region and city:
filtered_df = df3[df["Region"].isin(region) & df3["City"].isin(city)]
elif region and state:
filtered_df = df3[df["Region"].isin(region) & df3["State"].isin(state)]
elif city:
filtered_df = df3[df3["City"].isin(city)]
else:
filtered_df = df3[df3["Region"].isin(region) & df3["State"].isin(state) & df3["City"].isin(city)]
category_df = filtered_df.groupby(by=["Category"], as_index=False)["Sales"].sum()
with col1:
st.subheader("Category wise Sales")
fig = px.bar(category_df, x="Category", y="Sales", text=['${:,.2f}'.format(x) for x in category_df["Sales"]],
template="seaborn")
st.plotly_chart(fig, use_container_width=True, height=200)
with col2:
st.subheader("Region wise Sales")
fig = px.pie(filtered_df, values="Sales", names="Region", hole=0.5)
fig.update_traces(text=filtered_df["Region"], textposition="outside")
st.plotly_chart(fig, use_container_width=True)
cl1, cl2 = st.columns((2))
with cl1:
with st.expander("Category_ViewData"):
st.write(category_df.style.background_gradient(cmap="Blues"))
csv = category_df.to_csv(index=False).encode('utf-8')
st.download_button("Download Data", data=csv, file_name="Category.csv", mime="text/csv",
help='Click here to download the data as a CSV file')
with cl2:
with st.expander("Region_ViewData"):
region = filtered_df.groupby(by="Region", as_index=False)["Sales"].sum()
st.write(region.style.background_gradient(cmap="Oranges"))
csv = region.to_csv(index=False).encode('utf-8')
st.download_button("Download Data", data=csv, file_name="Region.csv", mime="text/csv",
help='Click here to download the data as a CSV file')
filtered_df["month_year"] = filtered_df["Order Date"].dt.to_period("M")
st.subheader('Time Series Analysis')
linechart = pd.DataFrame(filtered_df.groupby(filtered_df["month_year"].dt.strftime("%Y : %b"))["Sales"].sum()).reset_index()
fig2 = px.line(linechart, x="month_year", y="Sales", labels={"Sales": "Amount"}, height=500, width=1000, template="gridon")
st.plotly_chart(fig2, use_container_width=True)
with st.expander("View Data of TimeSeries:"):
st.write(linechart.T.style.background_gradient(cmap="Blues"))
csv = linechart.to_csv(index=False).encode("utf-8")
st.download_button('Download Data', data=csv, file_name="TimeSeries.csv", mime='text/csv')
# Create a treemap based on Region, category, sub-Category
st.subheader("Hierarchical view of Sales using TreeMap")
fig3 = px.treemap(filtered_df, path=["Region", "Category", "Sub-Category"], values="Sales", hover_data=["Sales"],
color="Sub-Category")
fig3.update_layout(width=800, height=650)
st.plotly_chart(fig3, use_container_width=True)
chart1, chart2 = st.columns((2))
with chart1:
st.subheader('Segment wise Sales')
fig = px.pie(filtered_df, values="Sales", names="Segment", template="plotly_dark")
fig.update_traces(text=filtered_df["Segment"], textposition="inside")
st.plotly_chart(fig, use_container_width=True)
with chart2:
st.subheader('Category wise Sales')
fig = px.pie(filtered_df, values="Sales", names="Category", template="gridon")
fig.update_traces(text=filtered_df["Category"], textposition="inside")
st.plotly_chart(fig, use_container_width=True)
import plotly.figure_factory as ff
st.subheader(":point_right: Month wise Sub-Category Sales Summary")
with st.expander("Summary_Table"):
df_sample = df[0:5][["Region", "State", "City", "Category", "Sales", "Profit", "Quantity"]]
fig = ff.create_table(df_sample, colorscale="Cividis")
st.plotly_chart(fig, use_container_width=True)
st.markdown("Month wise sub-Category Table")
filtered_df["month"] = filtered_df["Order Date"].dt.month_name()
sub_category_Year = pd.pivot_table(data=filtered_df, values="Sales", index=["Sub-Category"], columns="month")
st.write(sub_category_Year.style.background_gradient(cmap="Blues"))
# Create a scatter plot
data1 = px.scatter(filtered_df, x="Sales", y="Profit", size="Quantity")
data1['layout'].update(title="Relationship between Sales and Profits using Scatter Plot.",
titlefont=dict(size=20), xaxis=dict(title="Sales", titlefont=dict(size=19)),
yaxis=dict(title="Profit", titlefont=dict(size=19)))
st.plotly_chart(data1, use_container_width=True)
with st.expander("View Data"):
st.write(filtered_df.iloc[:500, 1:20:2].style.background_gradient(cmap="Oranges"))
# Download original DataSet
csv = df.to_csv(index=False).encode('utf-8')
st.download_button('Download Data', data=csv, file_name="Data.csv", mime="text/csv")