Skip to content

Commit 6029a2f

Browse files
committed
initial git commit
0 parents  commit 6029a2f

File tree

5 files changed

+234
-0
lines changed

5 files changed

+234
-0
lines changed

LICENSE

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
* Copyright (c) 2007, SFTank
2+
* All rights reserved.
3+
* Redistribution and use in source and binary forms, with or without
4+
* modification, are permitted provided that the following conditions are met:
5+
*
6+
* * Redistributions of source code must retain the above copyright
7+
* notice, this list of conditions and the following disclaimer.
8+
* * Redistributions in binary form must reproduce the above copyright
9+
* notice, this list of conditions and the following disclaimer in the
10+
* documentation and/or other materials provided with the distribution.
11+
* * Neither the name of SFTank nor the
12+
* names of its contributors may be used to endorse or promote products
13+
* derived from this software without specific prior written permission.
14+
*
15+
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
16+
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17+
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18+
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
19+
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20+
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21+
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22+
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24+
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Setup.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import Distribution.Simple
2+
main = defaultMain

hstats.cabal

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
Name: hstats
2+
Version: 0.2a
3+
License: BSD3
4+
License-file: LICENSE
5+
Author: Marshall Beddoe
6+
Copyright: Copyright (c) 2008, Marshall Beddoe
7+
category: Math
8+
synopsis: Statistical Computing in Haskell
9+
description: A library of commonly used statistical functions
10+
maintainer: mbeddoe@<nospam>gmail.com
11+
homepage: http://github.com/unmarshal/hstats/
12+
hs-source-dirs: src
13+
ghc-options: -O -XBangPatterns
14+
exposed-Modules: Math.Statistics
15+
build-depends: base>=2.0, haskell98

src/Math/Statistics.hs

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
{-# OPTIONS_GHC -XBangPatterns #-}
2+
3+
-----------------------------------------------------------------------------
4+
-- Module : Math.Statistics
5+
-- Copyright : (c) 2007 SFTank
6+
-- License : BSD3
7+
--
8+
-- Maintainer : mbeddoe@<nospam>sftank.net
9+
-- Stability : experimental
10+
-- Portability : portable
11+
--
12+
-- Description :
13+
-- A collection of commonly used statistical functions.
14+
-----------------------------------------------------------------------------
15+
16+
module Math.Statistics where
17+
18+
import Data.List
19+
import Data.Ord (comparing)
20+
21+
-- Numerically stable mean
22+
-- Thanks dmwit and ddarius for help on strictness issues
23+
mean :: Floating a => [a] -> a
24+
mean x = fst $ foldl' (\(!m, !n) x -> (m+(x-m)/(n+1),n+1)) (0,0) x
25+
26+
-- Harmonic mean
27+
hmean :: (Floating a) => [a] -> a
28+
hmean xs = fromIntegral (length xs) / (sum $ map (1/) xs)
29+
30+
-- Geometric mean
31+
gmean :: (Floating a) => [a] -> a
32+
gmean xs = (foldr1 (*) xs)**(1 / fromIntegral (length xs))
33+
34+
-- Median
35+
median :: (Floating a, Ord a) => [a] -> a
36+
median x | odd n = head $ drop (n `div` 2) x'
37+
| even n = mean $ take 2 $ drop i x'
38+
where i = (length x' `div` 2) - 1
39+
x' = sort x
40+
n = length x
41+
42+
-- Modes
43+
-- Returns a sorted list of modes in descending order
44+
modes :: (Ord a) => [a] -> [(Int, a)]
45+
modes xs = sortBy (comparing $ negate.fst) $ map (\x->(length x, head x)) $ (group.sort) xs
46+
47+
-- Central moments
48+
centralMoment xs 1 = 0
49+
centralMoment xs r = (sum (map (\x -> (x-m)^r) xs)) / n
50+
where
51+
m = mean xs
52+
n = fromIntegral $ length xs
53+
54+
-- Range
55+
range :: (Num a, Ord a) => [a] -> a
56+
range xs = maximum xs - minimum xs
57+
58+
-- Average deviation
59+
avgdev :: (Floating a) => [a] -> a
60+
avgdev xs = mean $ map (\x -> abs(x - m)) xs
61+
where
62+
m = mean xs
63+
64+
-- Standard deviation
65+
stddev :: (Floating a) => [a] -> a
66+
stddev xs = sqrt $ var xs
67+
68+
-- Population variance
69+
pvar :: (Floating a) => [a] -> a
70+
pvar xs = centralMoment xs 2
71+
72+
-- Numerically stable sample variance
73+
-- This crashes
74+
var xs = (var' 0 0 0 xs) / (fromIntegral $ length xs - 1)
75+
where
76+
var' _ _ s [] = s
77+
var' m n s (x:xs) = var' nm (n + 1) (s + delta * (x - nm)) xs
78+
where
79+
delta = x - m
80+
nm = m + delta/(fromIntegral $ n + 1)
81+
82+
-- Interquartile range
83+
-- XXX: Add case that takes into account even vs odd length
84+
iqr xs = take (length xs - 2*q) $ drop q xs
85+
where
86+
q = ((length xs) + 1) `div` 4
87+
88+
-- Kurtosis
89+
kurtosis xs = ((centralMoment xs 4) / (centralMoment xs 2)^2)-3
90+
91+
-- |Arbitrary quantile q of an unsorted list. The quantile /q/ of /N/
92+
-- |data points is the point whose (zero-based) index in the sorted
93+
-- |data set is closest to /q(N-1)/.
94+
quantile :: (Fractional b, Ord b) => Double -> [b] -> b
95+
quantile q = quantileAsc q . sort
96+
97+
-- |As 'quantile' specialized for sorted data
98+
quantileAsc :: (Fractional b, Ord b) => Double -> [b] -> b
99+
quantileAsc _ [] = error "quantile on empty list"
100+
quantileAsc q xs
101+
| q < 0 || q > 1 = error "quantile out of range"
102+
| otherwise = xs !! (quantIndex (length xs) q)
103+
where quantIndex :: Int -> Double -> Int
104+
quantIndex len q = case round $ q * (fromIntegral len - 1) of
105+
idx | idx < 0 -> error "Quantile index too small"
106+
| idx >= len -> error "Quantile index too large"
107+
| otherwise -> idx
108+
109+
-- Skew
110+
skew xs = (centralMoment xs 3) / (centralMoment xs 2)**(3/2)
111+
112+
pearsonSkew1 xs = 3 * (mean xs - mo) / stddev xs
113+
where
114+
mo = snd $ head $ modes xs
115+
116+
pearsonSkew2 xs = 3 * (mean xs - median xs) / stddev xs
117+
118+
-- Covariance
119+
cov :: (Floating a) => [a] -> [a] -> a
120+
cov xs ys = sum (zipWith (*) (map f1 xs) (map f2 ys)) / (n - 1)
121+
where
122+
n = fromIntegral $ length $ xs
123+
m1 = mean xs
124+
m2 = mean ys
125+
f1 = \x -> (x - m1)
126+
f2 = \x -> (x - m2)
127+
128+
-- Covariance matrix
129+
covMatrix :: (Floating a) => [[a]] -> [[a]]
130+
covMatrix xs = split' (length xs) cs
131+
where
132+
cs = [ cov a b | a <- xs, b <- xs]
133+
split' n = unfoldr (\y -> if null y then Nothing else Just $ splitAt n y)
134+
135+
-- Pearson's product-moment correlation coefficient
136+
corr :: (Floating a) => [a] -> [a] -> a
137+
corr x y = cov x y / (stddev x * stddev y)
138+
139+
-- |Least-squares linear regression of /y/ against /x/ for a
140+
-- |collection of (/x/, /y/) data, in the form of (/b0/, /b1/, /r/)
141+
-- |where the regression is /y/ = /b0/ + /b1/ * /x/ with Pearson
142+
-- |coefficient /r/
143+
144+
linreg :: (Floating b) => [(b, b)] -> (b, b, b)
145+
linreg xys = let !xs = map fst xys
146+
!ys = map snd xys
147+
!n = fromIntegral $ length xys
148+
!sX = sum xs
149+
!sY = sum ys
150+
!sXX = sum $ map (^ 2) xs
151+
!sXY = sum $ map (uncurry (*)) xys
152+
!sYY = sum $ map (^ 2) ys
153+
!alpha = (sY - beta * sX) / n
154+
!beta = (n * sXY - sX * sY) / (n * sXX - sX * sX)
155+
!r = (n * sXY - sX * sY) / (sqrt $ (n * sXX - sX^2) * (n * sYY - sY ^ 2))
156+
in (alpha, beta, r)

tests/QCStatistics.hs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
module QCStatistics where
2+
3+
import Control.Monad
4+
import Test.QuickCheck
5+
6+
import Math.Statistics
7+
8+
-- Quantiles
9+
10+
prop_Quantiles :: Property
11+
prop_Quantiles = forAll (choose (0, 1)) $
12+
\q -> forAll (sized $ \n -> replicateM (n + 1) (arbitrary :: Gen Double)) $
13+
\xs -> let nx = fromIntegral $ length xs
14+
qx = quantile q xs
15+
in collect (count (< qx) xs, count (<= qx) xs, count (> qx) xs, count (>= qx) xs) $
16+
and [count (< qx) xs <= ceiling (q * nx),
17+
count (<= qx) xs >= floor (q * nx),
18+
count (> qx) xs <= ceiling ((1 - q) * nx),
19+
count (>= qx) xs >= floor ((1 - q) * nx)]
20+
where count pred = length . filter pred
21+
22+
-- Linear regression
23+
24+
prop_LinReg :: (Double, Double) -> Property
25+
prop_LinReg (a0, a1) = forAll genXYs $
26+
\xys -> let (b0, b1, r) = linreg xys
27+
in and [b0 ~= a0, b1 ~= a1,
28+
if a1 ~= 0.0 then isNaN r else abs r ~= 1]
29+
where genXYs :: Gen [(Double, Double)]
30+
genXYs = liftM (map (\x -> (x, a0 + a1 * x))) $ genXs
31+
genXs :: Gen [Double]
32+
genXs = liftM (scanl1 (+)) $ sized $ \n -> replicateM (n + 2) $ genNonZero
33+
genNonZero :: Gen Double
34+
genNonZero = ap (elements [id,negate]) $ choose (1, 10)
35+
36+
(~=) :: (Fractional a, Ord a) => a -> a -> Bool
37+
x1 ~= x2 = abs(x1 - x2) < 1e-6

0 commit comments

Comments
 (0)