|
| 1 | +{-# OPTIONS_GHC -XBangPatterns #-} |
| 2 | + |
| 3 | +----------------------------------------------------------------------------- |
| 4 | +-- Module : Math.Statistics |
| 5 | +-- Copyright : (c) 2007 SFTank |
| 6 | +-- License : BSD3 |
| 7 | +-- |
| 8 | +-- Maintainer : mbeddoe@<nospam>sftank.net |
| 9 | +-- Stability : experimental |
| 10 | +-- Portability : portable |
| 11 | +-- |
| 12 | +-- Description : |
| 13 | +-- A collection of commonly used statistical functions. |
| 14 | +----------------------------------------------------------------------------- |
| 15 | + |
| 16 | +module Math.Statistics where |
| 17 | + |
| 18 | +import Data.List |
| 19 | +import Data.Ord (comparing) |
| 20 | + |
| 21 | +-- Numerically stable mean |
| 22 | +-- Thanks dmwit and ddarius for help on strictness issues |
| 23 | +mean :: Floating a => [a] -> a |
| 24 | +mean x = fst $ foldl' (\(!m, !n) x -> (m+(x-m)/(n+1),n+1)) (0,0) x |
| 25 | + |
| 26 | +-- Harmonic mean |
| 27 | +hmean :: (Floating a) => [a] -> a |
| 28 | +hmean xs = fromIntegral (length xs) / (sum $ map (1/) xs) |
| 29 | + |
| 30 | +-- Geometric mean |
| 31 | +gmean :: (Floating a) => [a] -> a |
| 32 | +gmean xs = (foldr1 (*) xs)**(1 / fromIntegral (length xs)) |
| 33 | + |
| 34 | +-- Median |
| 35 | +median :: (Floating a, Ord a) => [a] -> a |
| 36 | +median x | odd n = head $ drop (n `div` 2) x' |
| 37 | + | even n = mean $ take 2 $ drop i x' |
| 38 | + where i = (length x' `div` 2) - 1 |
| 39 | + x' = sort x |
| 40 | + n = length x |
| 41 | + |
| 42 | +-- Modes |
| 43 | +-- Returns a sorted list of modes in descending order |
| 44 | +modes :: (Ord a) => [a] -> [(Int, a)] |
| 45 | +modes xs = sortBy (comparing $ negate.fst) $ map (\x->(length x, head x)) $ (group.sort) xs |
| 46 | + |
| 47 | +-- Central moments |
| 48 | +centralMoment xs 1 = 0 |
| 49 | +centralMoment xs r = (sum (map (\x -> (x-m)^r) xs)) / n |
| 50 | + where |
| 51 | + m = mean xs |
| 52 | + n = fromIntegral $ length xs |
| 53 | + |
| 54 | +-- Range |
| 55 | +range :: (Num a, Ord a) => [a] -> a |
| 56 | +range xs = maximum xs - minimum xs |
| 57 | + |
| 58 | +-- Average deviation |
| 59 | +avgdev :: (Floating a) => [a] -> a |
| 60 | +avgdev xs = mean $ map (\x -> abs(x - m)) xs |
| 61 | + where |
| 62 | + m = mean xs |
| 63 | + |
| 64 | +-- Standard deviation |
| 65 | +stddev :: (Floating a) => [a] -> a |
| 66 | +stddev xs = sqrt $ var xs |
| 67 | + |
| 68 | +-- Population variance |
| 69 | +pvar :: (Floating a) => [a] -> a |
| 70 | +pvar xs = centralMoment xs 2 |
| 71 | + |
| 72 | +-- Numerically stable sample variance |
| 73 | +-- This crashes |
| 74 | +var xs = (var' 0 0 0 xs) / (fromIntegral $ length xs - 1) |
| 75 | + where |
| 76 | + var' _ _ s [] = s |
| 77 | + var' m n s (x:xs) = var' nm (n + 1) (s + delta * (x - nm)) xs |
| 78 | + where |
| 79 | + delta = x - m |
| 80 | + nm = m + delta/(fromIntegral $ n + 1) |
| 81 | + |
| 82 | +-- Interquartile range |
| 83 | +-- XXX: Add case that takes into account even vs odd length |
| 84 | +iqr xs = take (length xs - 2*q) $ drop q xs |
| 85 | + where |
| 86 | + q = ((length xs) + 1) `div` 4 |
| 87 | + |
| 88 | +-- Kurtosis |
| 89 | +kurtosis xs = ((centralMoment xs 4) / (centralMoment xs 2)^2)-3 |
| 90 | + |
| 91 | +-- |Arbitrary quantile q of an unsorted list. The quantile /q/ of /N/ |
| 92 | +-- |data points is the point whose (zero-based) index in the sorted |
| 93 | +-- |data set is closest to /q(N-1)/. |
| 94 | +quantile :: (Fractional b, Ord b) => Double -> [b] -> b |
| 95 | +quantile q = quantileAsc q . sort |
| 96 | + |
| 97 | +-- |As 'quantile' specialized for sorted data |
| 98 | +quantileAsc :: (Fractional b, Ord b) => Double -> [b] -> b |
| 99 | +quantileAsc _ [] = error "quantile on empty list" |
| 100 | +quantileAsc q xs |
| 101 | + | q < 0 || q > 1 = error "quantile out of range" |
| 102 | + | otherwise = xs !! (quantIndex (length xs) q) |
| 103 | + where quantIndex :: Int -> Double -> Int |
| 104 | + quantIndex len q = case round $ q * (fromIntegral len - 1) of |
| 105 | + idx | idx < 0 -> error "Quantile index too small" |
| 106 | + | idx >= len -> error "Quantile index too large" |
| 107 | + | otherwise -> idx |
| 108 | + |
| 109 | +-- Skew |
| 110 | +skew xs = (centralMoment xs 3) / (centralMoment xs 2)**(3/2) |
| 111 | + |
| 112 | +pearsonSkew1 xs = 3 * (mean xs - mo) / stddev xs |
| 113 | + where |
| 114 | + mo = snd $ head $ modes xs |
| 115 | + |
| 116 | +pearsonSkew2 xs = 3 * (mean xs - median xs) / stddev xs |
| 117 | + |
| 118 | +-- Covariance |
| 119 | +cov :: (Floating a) => [a] -> [a] -> a |
| 120 | +cov xs ys = sum (zipWith (*) (map f1 xs) (map f2 ys)) / (n - 1) |
| 121 | + where |
| 122 | + n = fromIntegral $ length $ xs |
| 123 | + m1 = mean xs |
| 124 | + m2 = mean ys |
| 125 | + f1 = \x -> (x - m1) |
| 126 | + f2 = \x -> (x - m2) |
| 127 | + |
| 128 | +-- Covariance matrix |
| 129 | +covMatrix :: (Floating a) => [[a]] -> [[a]] |
| 130 | +covMatrix xs = split' (length xs) cs |
| 131 | + where |
| 132 | + cs = [ cov a b | a <- xs, b <- xs] |
| 133 | + split' n = unfoldr (\y -> if null y then Nothing else Just $ splitAt n y) |
| 134 | + |
| 135 | +-- Pearson's product-moment correlation coefficient |
| 136 | +corr :: (Floating a) => [a] -> [a] -> a |
| 137 | +corr x y = cov x y / (stddev x * stddev y) |
| 138 | + |
| 139 | +-- |Least-squares linear regression of /y/ against /x/ for a |
| 140 | +-- |collection of (/x/, /y/) data, in the form of (/b0/, /b1/, /r/) |
| 141 | +-- |where the regression is /y/ = /b0/ + /b1/ * /x/ with Pearson |
| 142 | +-- |coefficient /r/ |
| 143 | + |
| 144 | +linreg :: (Floating b) => [(b, b)] -> (b, b, b) |
| 145 | +linreg xys = let !xs = map fst xys |
| 146 | + !ys = map snd xys |
| 147 | + !n = fromIntegral $ length xys |
| 148 | + !sX = sum xs |
| 149 | + !sY = sum ys |
| 150 | + !sXX = sum $ map (^ 2) xs |
| 151 | + !sXY = sum $ map (uncurry (*)) xys |
| 152 | + !sYY = sum $ map (^ 2) ys |
| 153 | + !alpha = (sY - beta * sX) / n |
| 154 | + !beta = (n * sXY - sX * sY) / (n * sXX - sX * sX) |
| 155 | + !r = (n * sXY - sX * sY) / (sqrt $ (n * sXX - sX^2) * (n * sYY - sY ^ 2)) |
| 156 | + in (alpha, beta, r) |
0 commit comments