{-# LANGUAGE FlexibleContexts, BangPatterns #-}

-- |
-- Module    : Statistics.Sample.Histogram
-- Copyright : (c) 2011 Bryan O'Sullivan
-- License   : BSD3
--
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : portable
--
-- Functions for computing histograms of sample data.

module Statistics.Sample.Histogram
    (
      histogram
    -- * Building blocks
    , histogram_
    , range
    ) where

import Numeric.MathFunctions.Constants (m_epsilon,m_tiny)
import Statistics.Function (minMax)
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as GM

-- | /O(n)/ Compute a histogram over a data set.
--
-- The result consists of a pair of vectors:
--
-- * The lower bound of each interval.
--
-- * The number of samples within the interval.
--
-- Interval (bin) sizes are uniform, and the upper and lower bounds
-- are chosen automatically using the 'range' function.  To specify
-- these parameters directly, use the 'histogram_' function.
histogram :: (G.Vector v0 Double, G.Vector v1 Double, Num b, G.Vector v1 b) =>
             Int                -- ^ Number of bins (must be positive).
          -> v0 Double          -- ^ Sample data (cannot be empty).
          -> (v1 Double, v1 b)
histogram :: Int -> v0 Double -> (v1 Double, v1 b)
histogram Int
numBins v0 Double
xs = (Int -> (Int -> Double) -> v1 Double
forall (v :: * -> *) a. Vector v a => Int -> (Int -> a) -> v a
G.generate Int
numBins Int -> Double
forall a. Integral a => a -> Double
step, Int -> Double -> Double -> v0 Double -> v1 b
forall b a (v0 :: * -> *) (v1 :: * -> *).
(Num b, RealFrac a, Vector v0 a, Vector v1 b) =>
Int -> a -> a -> v0 a -> v1 b
histogram_ Int
numBins Double
lo Double
hi v0 Double
xs)
    where (Double
lo,Double
hi)    = Int -> v0 Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
Int -> v Double -> (Double, Double)
range Int
numBins v0 Double
xs
          step :: a -> Double
step a
i     = Double
lo Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
d Double -> Double -> Double
forall a. Num a => a -> a -> a
* a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
i
          d :: Double
d          = (Double
hi Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins
{-# INLINE histogram #-}

-- | /O(n)/ Compute a histogram over a data set.
--
-- Interval (bin) sizes are uniform, based on the supplied upper
-- and lower bounds.
histogram_ :: (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) =>
              Int
           -- ^ Number of bins.  This value must be positive.  A zero
           -- or negative value will cause an error.
           -> a
           -- ^ Lower bound on interval range.  Sample data less than
           -- this will cause an error.
           -> a
           -- ^ Upper bound on interval range.  This value must not be
           -- less than the lower bound.  Sample data that falls above
           -- the upper bound will cause an error.
           -> v0 a
           -- ^ Sample data.
           -> v1 b
histogram_ :: Int -> a -> a -> v0 a -> v1 b
histogram_ Int
numBins a
lo a
hi v0 a
xs0 = (forall s. ST s (Mutable v1 s b)) -> v1 b
forall (v :: * -> *) a.
Vector v a =>
(forall s. ST s (Mutable v s a)) -> v a
G.create (Int -> b -> ST s (Mutable v1 (PrimState (ST s)) b)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
numBins b
0 ST s (Mutable v1 s b)
-> (Mutable v1 s b -> ST s (Mutable v1 s b))
-> ST s (Mutable v1 s b)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= v0 a
-> Mutable v1 (PrimState (ST s)) b
-> ST s (Mutable v1 (PrimState (ST s)) b)
forall (v :: * -> *) (m :: * -> *) a (v :: * -> * -> *).
(PrimMonad m, Num a, MVector v a, Vector v a) =>
v a -> v (PrimState m) a -> m (v (PrimState m) a)
bin v0 a
xs0)
  where
    bin :: v a -> v (PrimState m) a -> m (v (PrimState m) a)
bin v a
xs v (PrimState m) a
bins = Int -> m (v (PrimState m) a)
go Int
0
     where
       go :: Int -> m (v (PrimState m) a)
go Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = v (PrimState m) a -> m (v (PrimState m) a)
forall (m :: * -> *) a. Monad m => a -> m a
return v (PrimState m) a
bins
            | Bool
otherwise = do
         let x :: a
x = v a
xs v a -> Int -> a
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
i
             b :: Int
b = a -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate (a -> Int) -> a -> Int
forall a b. (a -> b) -> a -> b
$ (a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
lo) a -> a -> a
forall a. Fractional a => a -> a -> a
/ a
d
         v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
write' v (PrimState m) a
bins Int
b (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1) (a -> m ()) -> m a -> m ()
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.read v (PrimState m) a
bins Int
b
         Int -> m (v (PrimState m) a)
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
       write' :: v (PrimState m) a -> Int -> a -> m ()
write' v (PrimState m) a
bins' Int
b !a
e = v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.write v (PrimState m) a
bins' Int
b a
e
       len :: Int
len = v a -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length v a
xs
       d :: a
d = ((a
hi a -> a -> a
forall a. Num a => a -> a -> a
- a
lo) a -> a -> a
forall a. Fractional a => a -> a -> a
/ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins) a -> a -> a
forall a. Num a => a -> a -> a
* (a
1 a -> a -> a
forall a. Num a => a -> a -> a
+ Double -> a
forall a b. (Real a, Fractional b) => a -> b
realToFrac Double
m_epsilon)
{-# INLINE histogram_ #-}

-- | /O(n)/ Compute decent defaults for the lower and upper bounds of
-- a histogram, based on the desired number of bins and the range of
-- the sample data.
--
-- The upper and lower bounds used are @(lo-d, hi+d)@, where
--
-- @d = (maximum sample - minimum sample) / ((bins - 1) * 2)@
--
-- If all elements in the sample are the same and equal to @x@ range
-- is set to @(x - |x|/10, x + |x|/10)@. And if @x@ is equal to 0 range
-- is set to @(-1,1)@. This is needed to avoid creating histogram with
-- zero bin size.
range :: (G.Vector v Double) =>
         Int                    -- ^ Number of bins (must be positive).
      -> v Double               -- ^ Sample data (cannot be empty).
      -> (Double, Double)
range :: Int -> v Double -> (Double, Double)
range Int
numBins v Double
xs
    | Int
numBins Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
1 = [Char] -> (Double, Double)
forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Histogram.range: invalid bin count"
    | v Double -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Double
xs   = [Char] -> (Double, Double)
forall a. HasCallStack => [Char] -> a
error [Char]
"Statistics.Histogram.range: empty sample"
    | Double
lo Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
hi    = case Double -> Double
forall a. Num a => a -> a
abs Double
lo Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
10 of
                      Double
a | Double
a Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
m_tiny -> (-Double
1,Double
1)
                        | Bool
otherwise  -> (Double
lo Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a, Double
lo Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
a)
    | Bool
otherwise   = (Double
loDouble -> Double -> Double
forall a. Num a => a -> a -> a
-Double
d, Double
hiDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
d)
  where
    d :: Double
d | Int
numBins Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 = Double
0
      | Bool
otherwise    = (Double
hi Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ ((Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
numBins Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
2)
    (Double
lo,Double
hi)          = v Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
minMax v Double
xs
{-# INLINE range #-}