Skip to content

Commit 027aceb

Browse files
committed
Add Z-score normalization algorithm for data standardization
1 parent 02680c9 commit 027aceb

File tree

1 file changed

+68
-0
lines changed

1 file changed

+68
-0
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
"""
2+
Z-Score Normalization: Standardizes data by converting each value to the number
3+
of standard deviations it is from the mean. The result has a mean of 0 and a
4+
standard deviation of 1.
5+
6+
Formula: z = (x - mean) / standard_deviation
7+
8+
Z-score normalization is widely used in machine learning preprocessing,
9+
statistics, and data analysis to bring features to the same scale.
10+
11+
Reference: https://en.wikipedia.org/wiki/Standard_score
12+
"""
13+
14+
15+
def z_score_normalization(data: list[float]) -> list[float]:
16+
"""
17+
Normalize a list of numbers using Z-score normalization.
18+
19+
Parameters
20+
----------
21+
data: list[float], the input list of numbers
22+
23+
Returns
24+
-------
25+
list[float]: list of z-scores for each element
26+
27+
>>> z_score_normalization([2, 4, 4, 4, 5, 5, 7, 9])
28+
[-1.5, -0.5, -0.5, -0.5, 0.0, 0.0, 1.0, 2.0]
29+
>>> z_score_normalization([1, 1, 1, 1])
30+
Traceback (most recent call last):
31+
...
32+
ValueError: standard deviation is zero — all elements are identical
33+
>>> z_score_normalization([])
34+
Traceback (most recent call last):
35+
...
36+
ValueError: data cannot be empty
37+
>>> z_score_normalization([10])
38+
Traceback (most recent call last):
39+
...
40+
ValueError: data must contain at least two elements
41+
>>> z_score_normalization([0, 0, 1, 1])
42+
[-1.0, -1.0, 1.0, 1.0]
43+
>>> z_score_normalization([-5, 0, 5])
44+
[-1.2247448714, 0.0, 1.2247448714]
45+
"""
46+
if not data:
47+
raise ValueError("data cannot be empty")
48+
if len(data) < 2:
49+
raise ValueError("data must contain at least two elements")
50+
51+
mean = sum(data) / len(data)
52+
variance = sum((x - mean) ** 2 for x in data) / len(data)
53+
std_dev = variance ** 0.5
54+
55+
if std_dev == 0:
56+
raise ValueError("standard deviation is zero — all elements are identical")
57+
58+
return [round((x - mean) / std_dev, 10) for x in data]
59+
60+
61+
if __name__ == "__main__":
62+
import doctest
63+
64+
doctest.testmod()
65+
66+
data = [2, 4, 4, 4, 5, 5, 7, 9]
67+
print(f"Original data: {data}")
68+
print(f"Z-score normalized: {z_score_normalization(data)}")

0 commit comments

Comments
 (0)