Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions unstructured/metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import math
import os
import re
import statistics
Expand Down Expand Up @@ -196,14 +197,35 @@ def _stdev(scores: List[Optional[float]], rounding: Optional[int] = 3) -> Union[
Args:
rounding (int): optional argument that allows user to define decimal points. Default at 3.
"""
# Filter out None values
scores = [score for score in scores if score is not None]
# Use Welford's algorithm for one-pass computation
count = 0
mean = 0.0
M2 = 0.0

for score in scores:
if score is None:
continue
if math.isnan(score):
# Fall back to original behavior for NaN (raises ValueError)
scores = [score for score in scores if score is not None]
if len(scores) <= 1:
return None
if not rounding:
return statistics.stdev(scores)
return round(statistics.stdev(scores), rounding)
count += 1
delta = score - mean
mean += delta / count
M2 += delta * (score - mean)

# Proceed only if there are more than one value
if len(scores) <= 1:
if count <= 1:
return None

stdev = math.sqrt(M2 / (count - 1))
if not rounding:
return statistics.stdev(scores)
return round(statistics.stdev(scores), rounding)
return stdev
return round(stdev, rounding)


def _pstdev(scores: List[Optional[float]], rounding: Optional[int] = 3) -> Union[float, None]:
Expand Down