Improve scalability of counters a lot

Counters now scale orders of magnitudes better as the number of
writers to the same counter increases. Here's the results on our
current benchmark on a 6 core machine:

        cores
            1       6
before  1.87s  81.09s
after   0.11s   0.32s

Note how the very heavy contention gives a slowdown rather than a
speedup as the number of cores increases, but the new implementation
slows down much less than the old one and is also generally faster.
This commit is contained in:
Johan Tibell 2014-04-08 16:50:36 +02:00
parent 7f11142c89
commit 64e6ba1259
4 changed files with 34 additions and 17 deletions

View file

@ -1,4 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BangPatterns, ForeignFunctionInterface #-}
-- | This module defines a type for mutable, integer-valued counters.
-- Counters are non-negative, monotonically increasing values and can
-- be used to track e.g. the number of requests served since program
@ -10,19 +10,11 @@ module System.Remote.Counter
, add
) where
import Data.IORef (atomicModifyIORef)
import Prelude hiding (subtract)
import System.Remote.Counter.Internal
-- | Increase the counter by one.
inc :: Counter -> IO ()
inc (C ref) = do
!_ <- atomicModifyIORef ref $ \ n -> let n' = n + 1 in (n', n')
return ()
inc counter = add counter 1
-- | Increase the counter by the given amount.
add :: Counter -> Int -> IO ()
add (C ref) i = do
!_ <- atomicModifyIORef ref $ \ n -> let n' = n + i in (n', n')
return ()
foreign import ccall unsafe "hs_counter_add" add :: Counter -> Int -> IO ()

View file

@ -1,3 +1,4 @@
{-# LANGUAGE ForeignFunctionInterface #-}
{-# OPTIONS_HADDOCK not-home #-}
module System.Remote.Counter.Internal
(
@ -6,15 +7,14 @@ module System.Remote.Counter.Internal
, read
) where
import Data.IORef (IORef, newIORef, readIORef)
import Data.Int
import Foreign.Ptr (Ptr)
import Prelude hiding (read)
-- | A mutable, integer-valued counter.
newtype Counter = C { unC :: IORef Int }
newtype Counter = C { unC :: Ptr Int64 }
-- | Create a new, zero initialized, counter.
new :: IO Counter
new = C `fmap` newIORef 0
foreign import ccall unsafe "hs_counter_new" new :: IO Counter
read :: Counter -> IO Int
read = readIORef . unC
foreign import ccall unsafe "hs_counter_read" read :: Counter -> IO Int

24
cbits/counter.c Normal file
View file

@ -0,0 +1,24 @@
#include <stdlib.h>
#include "HsFFI.h"
StgInt* hs_counter_new(void) {
StgInt* counter = malloc(sizeof(StgInt));
*counter = 0;
return counter;
}
void hs_counter_add(volatile StgInt* counter, StgInt n) {
StgInt temp = n;
#if SIZEOF_VOID_P == 8
__asm__ __volatile__("lock; xaddq %0,%1"
#elif SIZEOF_VOID_P == 4
__asm__ __volatile__("lock; xaddl %0,%1"
#else
# error GHC untested on this architecture: sizeof(void *) != 4 or 8
#endif
: "+r" (temp), "+m" (*counter)
: : "cc", "memory");
}
StgInt hs_counter_read(volatile const StgInt* counter) {
return *counter;
}

View file

@ -52,6 +52,7 @@ library
unordered-containers < 0.3
ghc-options: -Wall
c-sources: cbits/counter.c
benchmark benchmarks
hs-source-dirs: benchmarks