module Statistics.Test.KruskalWallis
(
kruskalWallisTest
, kruskalWallisRank
, kruskalWallis
, module Statistics.Test.Types
) where
import Data.Ord (comparing)
import qualified Data.Vector.Unboxed as U
import Statistics.Function (sort, sortBy, square)
import Statistics.Distribution (complCumulative)
import Statistics.Distribution.ChiSquared (chiSquared)
import Statistics.Types
import Statistics.Test.Types
import Statistics.Test.Internal (rank)
import Statistics.Sample
import qualified Statistics.Sample.Internal as Sample(sum)
kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double]
kruskalWallisRank :: [Vector a] -> [Vector Double]
kruskalWallisRank [Vector a]
samples = Vector (Int, Double) -> [Vector Double]
forall a. (Eq a, Unbox a) => Vector (a, Double) -> [Vector Double]
groupByTags
(Vector (Int, Double) -> [Vector Double])
-> (Vector Double -> Vector (Int, Double))
-> Vector Double
-> [Vector Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comparison (Int, Double)
-> Vector (Int, Double) -> Vector (Int, Double)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Int, Double) -> Int) -> Comparison (Int, Double)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, Double) -> Int
forall a b. (a, b) -> a
fst)
(Vector (Int, Double) -> Vector (Int, Double))
-> (Vector Double -> Vector (Int, Double))
-> Vector Double
-> Vector (Int, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> Vector Double -> Vector (Int, Double)
forall a b.
(Unbox a, Unbox b) =>
Vector a -> Vector b -> Vector (a, b)
U.zip Vector Int
tags
(Vector Double -> [Vector Double])
-> Vector Double -> [Vector Double]
forall a b. (a -> b) -> a -> b
$ (a -> a -> Bool) -> Vector a -> Vector Double
forall (v :: * -> *) a.
(Vector v a, Vector v Double) =>
(a -> a -> Bool) -> v a -> v Double
rank a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==) Vector a
joinSample
where
(Vector Int
tags,Vector a
joinSample) = Vector (Int, a) -> (Vector Int, Vector a)
forall a b.
(Unbox a, Unbox b) =>
Vector (a, b) -> (Vector a, Vector b)
U.unzip
(Vector (Int, a) -> (Vector Int, Vector a))
-> (Vector (Int, a) -> Vector (Int, a))
-> Vector (Int, a)
-> (Vector Int, Vector a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Comparison (Int, a) -> Vector (Int, a) -> Vector (Int, a)
forall (v :: * -> *) e. Vector v e => Comparison e -> v e -> v e
sortBy (((Int, a) -> a) -> Comparison (Int, a)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, a) -> a
forall a b. (a, b) -> b
snd)
(Vector (Int, a) -> (Vector Int, Vector a))
-> Vector (Int, a) -> (Vector Int, Vector a)
forall a b. (a -> b) -> a -> b
$ ((Int, Vector a) -> Vector (Int, a))
-> [(Int, Vector a)] -> Vector (Int, a)
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap ((Int -> Vector a -> Vector (Int, a))
-> (Int, Vector a) -> Vector (Int, a)
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Int -> Vector a -> Vector (Int, a)
forall b a. (Unbox b, Unbox a) => a -> Vector b -> Vector (a, b)
tagSample) ([(Int, Vector a)] -> Vector (Int, a))
-> [(Int, Vector a)] -> Vector (Int, a)
forall a b. (a -> b) -> a -> b
$ [Int] -> [Vector a] -> [(Int, Vector a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [(Int
1::Int)..] [Vector a]
samples
tagSample :: a -> Vector b -> Vector (a, b)
tagSample a
t = (b -> (a, b)) -> Vector b -> Vector (a, b)
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\b
x -> (a
t,b
x))
groupByTags :: Vector (a, Double) -> [Vector Double]
groupByTags Vector (a, Double)
xs
| Vector (a, Double) -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector (a, Double)
xs = []
| Bool
otherwise = Vector Double -> Vector Double
sort (((a, Double) -> Double) -> Vector (a, Double) -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (a, Double) -> Double
forall a b. (a, b) -> b
snd Vector (a, Double)
ys) Vector Double -> [Vector Double] -> [Vector Double]
forall a. a -> [a] -> [a]
: Vector (a, Double) -> [Vector Double]
groupByTags Vector (a, Double)
zs
where
(Vector (a, Double)
ys,Vector (a, Double)
zs) = ((a, Double) -> Bool)
-> Vector (a, Double) -> (Vector (a, Double), Vector (a, Double))
forall a.
Unbox a =>
(a -> Bool) -> Vector a -> (Vector a, Vector a)
U.span (a -> a -> Bool
forall a. Eq a => a -> a -> Bool
(==) ((a, Double) -> a
forall a b. (a, b) -> a
fst ((a, Double) -> a) -> (a, Double) -> a
forall a b. (a -> b) -> a -> b
$ Vector (a, Double) -> (a, Double)
forall a. Unbox a => Vector a -> a
U.head Vector (a, Double)
xs) (a -> Bool) -> ((a, Double) -> a) -> (a, Double) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a, Double) -> a
forall a b. (a, b) -> a
fst) Vector (a, Double)
xs
kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double
kruskalWallis :: [Vector a] -> Double
kruskalWallis [Vector a]
samples = (Double
nTot Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
numerator Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
denominator
where
nTot :: Double
nTot = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> (Vector Double -> Int) -> Int
forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples Vector Double -> Int
forall a. Unbox a => Vector a -> Int
U.length
avgRank :: Double
avgRank = (Double
nTot Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
1) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
2
numerator :: Double
numerator = [Vector Double] -> (Vector Double -> Double) -> Double
forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples ((Vector Double -> Double) -> Double)
-> (Vector Double -> Double) -> Double
forall a b. (a -> b) -> a -> b
$ \Vector Double
sample ->
let n :: Double
n = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ Vector Double -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Double
sample
in Double
n Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
square (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
sample Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
avgRank)
denominator :: Double
denominator = [Vector Double] -> (Vector Double -> Double) -> Double
forall a. Num a => [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
rsamples ((Vector Double -> Double) -> Double)
-> (Vector Double -> Double) -> Double
forall a b. (a -> b) -> a -> b
$ \Vector Double
sample ->
Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
Sample.sum (Vector Double -> Double) -> Vector Double -> Double
forall a b. (a -> b) -> a -> b
$ (Double -> Double) -> Vector Double -> Vector Double
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\Double
r -> Double -> Double
square (Double
r Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
avgRank)) Vector Double
sample
rsamples :: [Vector Double]
rsamples = [Vector a] -> [Vector Double]
forall a. (Unbox a, Ord a) => [Vector a] -> [Vector Double]
kruskalWallisRank [Vector a]
samples
kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
kruskalWallisTest :: [Vector a] -> Maybe (Test ())
kruskalWallisTest [] = Maybe (Test ())
forall a. Maybe a
Nothing
kruskalWallisTest [Vector a]
samples
| (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
4) [Int]
ns = Test () -> Maybe (Test ())
forall a. a -> Maybe a
Just Test :: forall distr. PValue Double -> Double -> distr -> Test distr
Test { testSignificance :: PValue Double
testSignificance = Double -> PValue Double
forall a. (Ord a, Num a) => a -> PValue a
mkPValue (Double -> PValue Double) -> Double -> PValue Double
forall a b. (a -> b) -> a -> b
$ ChiSquared -> Double -> Double
forall d. Distribution d => d -> Double -> Double
complCumulative ChiSquared
d Double
k
, testStatistics :: Double
testStatistics = Double
k
, testDistribution :: ()
testDistribution = ()
}
| Bool
otherwise = Maybe (Test ())
forall a. Maybe a
Nothing
where
k :: Double
k = [Vector a] -> Double
forall a. (Unbox a, Ord a) => [Vector a] -> Double
kruskalWallis [Vector a]
samples
ns :: [Int]
ns = (Vector a -> Int) -> [Vector a] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map Vector a -> Int
forall a. Unbox a => Vector a -> Int
U.length [Vector a]
samples
d :: ChiSquared
d = Int -> ChiSquared
chiSquared ([Int] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
ns Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
sumWith :: Num a => [Sample] -> (Sample -> a) -> a
sumWith :: [Vector Double] -> (Vector Double -> a) -> a
sumWith [Vector Double]
samples Vector Double -> a
f = [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
Prelude.sum ([a] -> a) -> [a] -> a
forall a b. (a -> b) -> a -> b
$ (Vector Double -> a) -> [Vector Double] -> [a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Vector Double -> a
f [Vector Double]
samples
{-# INLINE sumWith #-}