code cleanup, better random quality for short

2018-09-04 14:21:25 +02:00 · 2018-09-04 14:21:25 +02:00 · 75f8064aaf
commit 75f8064aaf
parent d2a03a9125
5 changed files with 90 additions and 90 deletions
--- a/human-friendly-id-gen.cabal
+++ b/human-friendly-id-gen.cabal
@ -2,7 +2,7 @@
 --
 -- see: https://github.com/sol/hpack
 --
-- hash: 9c3e37ab9a4ab0be5cfdc76a03c0f8a4f3927508f29397d5d7cf5ebb45fa74b3
+-- hash: 0bb664d30c1e0572386dc9e95d45e1f5ec99b28b9c2e2722a81c52887647eb5b

 name:           human-friendly-id-gen
 version:        0.1.0.0
@ -29,9 +29,9 @@ data-files:
 library
  exposed-modules:
      HFIG.Dictionary
+      HFIG.Helpers
      HFIG.Lovecraftian
      HFIG.Short
-      Lib
  other-modules:
      Paths_human_friendly_id_gen
  hs-source-dirs:
--- a/src-exe/Main.hs
+++ b/src-exe/Main.hs
@ -12,8 +12,8 @@ main :: IO ()
 main = do
  opts <- options "Generate Human Friendly identifiers" optParser
  case genMode opts of
-    Short -> Short.idgen 8 >>= putText
-    Lovecraftian -> Lovecraftian.idgen 2 >>= putText
+    Short -> Short.idgen (fromMaybe 4 (optLen opts)) >>= putText
+    Lovecraftian -> Lovecraftian.idgen (fromMaybe 2 (optLen opts)) >>= putText
    Dict  -> do
      let file = case optDict opts of
                   Just "english"     -> "dictionaries/english.txt"
--- a/src/HFIG/Helpers.hs
+++ b/src/HFIG/Helpers.hs
@ -0,0 +1,67 @@
+{-|
+module      : HFIG.Helpers
+Description : Helpers for most generation strategy
+License     : Public Domain
+Maintainer  : yann.esposito@gmail.com
+
+Helpers to generate different kind of identifiers
+
+-}
+module HFIG.Helpers
+  ( idgen
+  , genWord
+  , collisionProbability
+  , NameParts
+  )
+where
+
+import Protolude
+
+import qualified System.Random.MWC as Random
+import qualified Control.Monad.Primitive as Prim
+import qualified Data.Vector as V
+import           Data.Vector ((!))
+
+import qualified Data.Text as Text
+
+-- | Generic id generator from using nameparts
+idgen :: Text -> NameParts -> Int -> IO Text
+idgen i np n = Random.withSystemRandom $ \gen ->
+  Text.intercalate i <$> replicateM n (genWord gen np)
+
+rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b
+rnd gen v = do
+  (k :: Int) <- Random.uniformR (0,V.length v - 1) gen
+  return (v ! k)
+
+genWord :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text
+genWord gen nps = mconcat <$> traverse (rnd gen) nps
+
+type NameParts = [V.Vector Text]
+
+-- | Approximate collision probability other n generated name with complexity
+-- parameter equal to l
+--
+-- For example if you generate 1000 words randomly with complexity parameter 4
+-- We estimate the probability of collision to 3.85%
+--
+-- This is a nice helper function to use when you want to estimate the optimal
+-- length of your ids
+--
+-- @
+-- > collisionProbability 1000 4
+-- 3.8580246913580245e-2
+--
+-- > collisionProbability 10000 5
+-- 6.430041152263374e-2
+--
+-- > collisionProbability 10000 6
+-- 1.0716735253772291e-3
+-- @
+collisionProbability ::
+  NameParts -- ^ name parts used
+  -> Double -- ^ nb of generated names
+  -> Double -- ^ length parameter used
+  -> Double
+collisionProbability nameparts n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1
+  where size = product (fmap (V.length . V.filter (/= "")) nameparts)
--- a/src/HFIG/Lovecraftian.hs
+++ b/src/HFIG/Lovecraftian.hs
@ -19,11 +19,11 @@ where
 import Protolude

 import qualified System.Random.MWC as Random
-import qualified Control.Monad.Primitive as Prim
 import qualified Data.Vector as V
-import           Data.Vector ((!))
 import qualified Data.Text as Text

+import qualified HFIG.Helpers as Helpers
+import           HFIG.Helpers (NameParts)

 -- | Will generate readable short names The integer parameter determine the
 -- length in number of syllabus of the name
@ -31,17 +31,9 @@ idgen :: Int -> IO Text
 idgen n = Random.withSystemRandom $ \gen -> do
  choice <- Random.uniformR (0 :: Int,1) gen
  let np = if choice == 0 then drop 1 nameparts else nameparts
-  Text.intercalate "-" <$> replicateM n (genGod gen np)
+  Text.intercalate "-" <$> replicateM n (Helpers.genWord gen np)

-rnd :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> V.Vector b -> m b
-rnd gen v = do
-  (k :: Int) <- Random.uniformR (0,V.length v - 1) gen
-  return (v ! k)
-
-genGod :: Random.Gen Prim.RealWorld -> [V.Vector Text] -> IO Text
-genGod gen nps = mconcat <$> traverse (rnd gen) nps
-
-nameparts :: [V.Vector Text]
+nameparts :: NameParts
 nameparts = [
  V.fromList ["a","e","i","u","o","a","ai","aiu","aiue","e","i","ia","iau","iu","o","u","y","ya","yi","yo"]
  , V.fromList ["bh","br","c'th","cn","ct","cth","cx","d","d'","g","gh","ghr","gr","h","k","kh","kth","mh","mh'","ml","n","ng","sh","t","th","tr","v","v'","vh","vh'","vr","x","z","z'","zh"]
@ -60,19 +52,7 @@ nameparts = [
 --
 -- This is a nice helper function to use when you want to estimate the optimal
 -- length of your ids
--
-- @
-- > collisionProbability 1000 4
-- 3.8580246913580245e-2
--
-- > collisionProbability 10000 5
-- 6.430041152263374e-2
--
-- > collisionProbability 10000 6
-- 1.0716735253772291e-3
-- @
 collisionProbability :: Double -- ^ nb of generated names
                     -> Double -- ^ length parameter used
                     -> Double
-collisionProbability n l = min ((n**2) / (2 * (fromIntegral size ** l))) 1
-  where size = product (fmap (V.length . V.filter (/= "")) nameparts) -- filtering the "" give a better approximation
+collisionProbability = Helpers.collisionProbability nameparts
--- a/src/HFIG/Short.hs
+++ b/src/HFIG/Short.hs
@ -1,3 +1,4 @@
+{-# OPTIONS_GHC -fno-warn-type-defaults #-}
 {-|
 module      : HFIG.Short
 Description : Short strategy to generate random human friendly ids
@ -12,82 +13,34 @@ Yet not the best for preventing collision.
 -}
 module HFIG.Short
  ( idgen
-  , idgenVariableLength
-  , collisionProbability
  )
 where

 import Protolude

-import qualified System.Random.MWC as Random
-import qualified System.Random.MWC.Distributions as Distr
-import qualified Control.Monad.Primitive as Prim
 import qualified Data.Vector as V

+import qualified HFIG.Helpers as Helpers

 -- | Will generate readable short names The integer parameter determine the
 -- length in number of syllabus of the name
 idgen :: Int -> IO Text
-idgen n = Random.withSystemRandom $ \gen -> do
-    mconcat <$> replicateM n (genSylab gen)
+idgen = Helpers.idgen "" nameparts

-- | Approximate collision probability other n generated name with complexity
-- parameter equal to l
--
-- For example if you generate 1000 words randomly with complexity parameter 4
-- We estimate the probability of collision to 3.85%
--
-- This is a nice helper function to use when you want to estimate the optimal
-- length of your ids
--
-- @
-- > collisionProbability 1000 4
-- 3.8580246913580245e-2
--
-- > collisionProbability 10000 5
-- 6.430041152263374e-2
--
-- > collisionProbability 10000 6
-- 1.0716735253772291e-3
-- @
-collisionProbability :: Double -- ^ nb of generated names
-                     -> Double -- ^ length parameter used
-                     -> Double
-collisionProbability n l = min ((n**2) / (2 * (nbSylabs ** l))) 1

-- | Will generate readable short names
-- The integer parameter determine the maximal length in number of sillabus of the name
-idgenVariableLength :: Int -> IO Text
-idgenVariableLength n = Random.withSystemRandom $ \gen -> do
-    val <- Distr.exponential nbSylabs gen
-    let len = n - (min (truncate $ val * fromIntegral n) (n - 1))
-    mconcat <$> replicateM len (genSylab gen)
-
-nbSylabs :: Double
-nbSylabs = fromIntegral $ V.length consonnants * V.length voyels
-
-genSylab :: Random.Gen Prim.RealWorld -> IO Text
-genSylab gen = mappend <$> genConsonnant gen <*> genVoyel gen
-
-genConsonnant :: Random.Gen (Prim.PrimState IO) -> IO Text
-genConsonnant gen = do
-  (k :: Int) <- Random.uniformR (0, V.length consonnants - 1) gen
-  return (consonnants V.! k)
+nameparts :: Helpers.NameParts
+nameparts = [ consonnants
+            , voyels
+            , V.fromList (replicate (2*(length consonnants)) "") <> consonnants]

 consonnants :: V.Vector Text
-consonnants = V.fromList basics
+consonnants = V.fromList $ basics
 where
-    basics = ["b" , "d", "f", "j", "k", "l", "m", "n", "p", "r", "s", "t", "v", "x", "z"]
-    -- you can also use basic <> composed but it will change the length of names in number of chars
-    -- and provide slightly harder to pronounce, read and understand names
-    -- for now I prefer to put that aside
-    -- composable = [ "b" , "d", "f", "k", "p", "s", "t", "v", "z"]
-    -- composed = mconcat (fmap (\x -> [ x <> c | c <- ["r", "l"]]) composable)
-
-genVoyel :: Prim.PrimMonad m => Random.Gen (Prim.PrimState m) -> m Text
-genVoyel gen = do
-  (k :: Int) <- Random.uniformR (0, V.length voyels - 1) gen
-  return (voyels V.! k)
+   basics = [ "b", "d", "f", "j", "k", "l", "m", "n"
+            , "p", "r", "s", "t", "v", "x", "z","w"
+            , "ch"
+            , "br", "dr", "pr", "tr"
+            , "bl", "pl" ]

 voyels :: V.Vector Text
 voyels = V.fromList ["a","i","o","u"]