diff --git a/ChangeLog.md b/ChangeLog.md index b09f756..338f36d 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,9 @@ # Changelog for fec +## 1.0 0 (2025-03-17) + +* Haskell wrapper returns IO throughout to avoid unsafePerformIO + ## 0.2.0 (2023-10-06) * Application code must now execute the `Codec.FEC.initialize` action at least diff --git a/benchmark-zfec/Main.hs b/benchmark-zfec/Main.hs index aad9992..6eeca03 100644 --- a/benchmark-zfec/Main.hs +++ b/benchmark-zfec/Main.hs @@ -2,7 +2,7 @@ module Main where import Codec.FEC (FECParams (paramK, paramN), decode, encode, fec, initialize) import Control.Monad (replicateM) -import Criterion.Main (Benchmark, bench, bgroup, defaultMain, env, nf) +import Criterion.Main (Benchmark, bench, bgroup, defaultMain, env, nfAppIO) import Data.Bifunctor (bimap) import qualified Data.ByteString as B import Data.List (unfoldr) @@ -13,15 +13,11 @@ main = defaultMain -- Run against some somewhat arbitrarily chosen configurations. Notably, -- though, 94/100 matches the numbers recorded in the readme. - [ env (setupFEC 2 3) makeFECBenchmarks - , env (setupFEC 16 31) makeFECBenchmarks - , env (setupFEC 94 100) makeFECBenchmarks + [ env (fec 2 3) makeFECBenchmarks + , env (fec 16 31) makeFECBenchmarks + , env (fec 94 100) makeFECBenchmarks ] where - setupFEC :: Int -> Int -> IO FECParams - setupFEC k n = do - initialize - pure (fec k n) makeFECBenchmarks = fecGroup [10 ^ 6] @@ -53,20 +49,20 @@ main = -- result is serialize use all of the bytes (eg, to write them to a -- file or send them over the network) so they will certainly all be -- used. - nf (uncurry encode) (params, blocks) + nfAppIO (uncurry encode) (params, blocks) benchmarkPrimaryDecode params blocks = bench ("decode [0..] blockSize=" <> showWithUnit (B.length $ head blocks)) $ -- normal form here for the same reason as in benchmarkEncode. -- assign block numbers to use only primary blocks - nf (uncurry decode) (params, (zip [0 ..] blocks)) + nfAppIO (uncurry decode) (params, (zip [0 ..] blocks)) benchmarkSecondaryDecode params blocks = bench ("decode [" <> show n <> "..] blockSize=" <> showWithUnit (B.length $ head blocks)) $ -- normal form here for the same reason as in benchmarkEncode. -- assign block numbers to use as many non-primary blocks as -- possible - nf (uncurry decode) (params, (zip [n ..] blocks)) + nfAppIO (uncurry decode) (params, (zip [n ..] blocks)) where n = paramN params - paramK params diff --git a/fec.cabal b/fec.cabal index 9a3c486..0eafa8a 100644 --- a/fec.cabal +++ b/fec.cabal @@ -1,6 +1,6 @@ cabal-version: 3.0 name: fec -version: 0.2.0 +version: 1.0.0 license: GPL-2.0-or-later license-file: README.rst author: Adam Langley @@ -31,10 +31,11 @@ extra-doc-files: ChangeLog.md library build-depends: - , base >=4.9 && <5 - , bytestring >=0.10 && <0.13 - , deepseq >=1.4 && <1.6 - , extra >=1.7 && <1.8 + , base <5 + , bytestring <0.13 + , deepseq <1.7 + , extra <2 + , global-lock <1 exposed-modules: Codec.FEC default-language: Haskell2010 @@ -49,11 +50,11 @@ executable benchmark-zfec main-is: Main.hs ghc-options: -threaded build-depends: - , base >=4.9 && <5 - , bytestring >=0.10 && <0.13 - , criterion >=1.1 && <1.7 + , base <5 + , bytestring <0.13 + , criterion <1.7 , fec - , random >=1.1 && <1.3 + , random <2 hs-source-dirs: benchmark-zfec default-language: Haskell2010 @@ -65,13 +66,13 @@ test-suite tests hs-source-dirs: haskell/test ghc-options: -Wall -threaded -rtsopts -with-rtsopts=-N build-depends: - , base >=4.9 && <5 - , bytestring >=0.10 && <0.13 - , data-serializer >=0.3 && <0.4 + , base <5 + , bytestring <0.13 + , data-serializer <1 , fec - , hspec >=2.7 && <2.12 - , QuickCheck >=2.14 && <2.15 - , quickcheck-instances >=0.3 && <0.4 - , random >=1.1 && <1.3 + , hspec <3 + , QuickCheck <2.16 + , quickcheck-instances <1 + , random <2 default-language: Haskell2010 diff --git a/haskell/Codec/FEC.hs b/haskell/Codec/FEC.hs index 2ffd397..e0f5c0d 100644 --- a/haskell/Codec/FEC.hs +++ b/haskell/Codec/FEC.hs @@ -33,7 +33,6 @@ module Codec.FEC ( deFEC, ) where -import Control.Concurrent.Extra (Lock, newLock, withLock) import Control.DeepSeq (NFData (rnf)) import Control.Exception (Exception, throwIO) import Data.Bits (xor) @@ -42,18 +41,14 @@ import qualified Data.ByteString.Unsafe as BU import Data.List (nub, partition, sortBy, (\\)) import Data.Word (Word8) import Foreign.C.Types (CSize (..), CUInt (..)) -import Foreign.ForeignPtr ( - ForeignPtr, - newForeignPtr, - withForeignPtr, - ) +import Foreign.ForeignPtr ( ForeignPtr, newForeignPtr, withForeignPtr,) import Foreign.Marshal.Alloc (allocaBytes) import Foreign.Marshal.Array (advancePtr, withArray) import Foreign.Ptr (FunPtr, Ptr, castPtr, nullPtr) import Foreign.Storable (poke, sizeOf) import GHC.Generics (Generic) +import System.GlobalLock import System.IO (IOMode (..), withFile) -import System.IO.Unsafe (unsafePerformIO) data CFEC data FECParams = FECParams @@ -129,39 +124,26 @@ data Uninitialized = Uninitialized deriving (Ord, Eq, Show) instance Exception Uninitialized --- A lock to ensure at most one thread attempts to initialize the underlying --- library at a time. Multiple initializations are harmless but concurrent --- initializations are disallowed. -_initializationLock :: Lock -{-# NOINLINE _initializationLock #-} -_initializationLock = unsafePerformIO newLock - -- | Initialize the library. This must be done before other APIs can succeed. initialize :: IO () -initialize = withLock _initializationLock _init +initialize = lock _init -- | Return a FEC with the given parameters. -fec :: - -- | the number of primary blocks - Int -> - -- | the total number blocks, must be < 256 - Int -> - FECParams +fec :: Int -- ^ the number of primary blocks + -> Int -- ^ the total number blocks, must be < 256 + -> IO FECParams fec k n = - if not (isValidConfig k n) - then error $ "Invalid FEC parameters: " ++ show k ++ " " ++ show n - else - unsafePerformIO - ( do - cfec' <- _new (fromIntegral k) (fromIntegral n) - -- new will return null if the library hasn't been - -- initialized. - if cfec' == nullPtr - then throwIO Uninitialized - else do - params <- newForeignPtr _free cfec' - return $ FECParams params k n - ) + if not (isValidConfig k n) + then error $ "Invalid FEC parameters: " ++ show k ++ " " ++ show n + else do + cfec <- _new (fromIntegral k) (fromIntegral n) + -- new will return null if the library hasn't been + -- initialized. + if cfec == nullPtr + then throwIO Uninitialized + else do + params <- newForeignPtr _free cfec + return $ FECParams params k n -- | Create a C array of unsigned from an input array uintCArray :: [Int] -> (Ptr CUInt -> IO a) -> IO a @@ -213,42 +195,22 @@ createByteStringArray n size f = do ) ) -{- | Generate the secondary blocks from a list of the primary blocks. The - primary blocks must be in order and all of the same size. There must be - @k@ primary blocks. --} -encode :: - FECParams -> - -- | a list of @k@ input blocks - [B.ByteString] -> - -- | (n - k) output blocks - [B.ByteString] +-- | Generate the secondary blocks from a list of the primary blocks. The +-- primary blocks must be in order and all of the same size. There must be +-- @k@ primary blocks. +encode :: FECParams + -> [B.ByteString] -- ^ a list of @k@ input blocks + -> IO [B.ByteString] -- ^ (n - k) output blocks encode (FECParams params k n) inblocks - | length inblocks /= k = error "Wrong number of blocks to FEC encode" - | not (allByteStringsSameLength inblocks) = error "Not all inputs to FEC encode are the same length" - | otherwise = - unsafePerformIO - ( do - let sz = B.length $ head inblocks - withForeignPtr - params - ( \cfec' -> do - byteStringsToArray - inblocks - ( \src -> do - createByteStringArray - (n - k) - sz - ( \fecs -> do - uintCArray - [k .. (n - 1)] - ( \block_nums -> do - _encode cfec' src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz - ) - ) - ) - ) - ) + | length inblocks /= k = error "Wrong number of blocks to FEC encode" + | not (allByteStringsSameLength inblocks) = error "Not all inputs to FEC encode are the same length" + | otherwise = do + let sz = B.length $ head inblocks + withForeignPtr params (\cfec -> do + byteStringsToArray inblocks (\src -> do + createByteStringArray (n - k) sz (\fecs -> do + uintCArray [k..(n - 1)] (\block_nums -> do + _encode cfec src fecs block_nums (fromIntegral (n - k)) $ fromIntegral sz)))) -- | A sort function for tagged assoc lists sortTagged :: [(Int, a)] -> [(Int, a)] @@ -268,50 +230,29 @@ reorderPrimaryBlocks n blocks = inner (sortTagged pBlocks) sBlocks [] then inner ps sBlocks' (acc ++ [(tag, a)]) else inner pBlocks' ss (acc ++ [s]) -{- | Recover the primary blocks from a list of @k@ blocks. Each block must be - tagged with its number (see the module comments about block numbering) --} -decode :: - FECParams -> - -- | a list of @k@ blocks and their index - [(Int, B.ByteString)] -> - -- | a list the @k@ primary blocks - [B.ByteString] +-- | Recover the primary blocks from a list of @k@ blocks. Each block must be +-- tagged with its number (see the module comments about block numbering) +decode :: FECParams + -> [(Int, B.ByteString)] -- ^ a list of @k@ blocks and their index + -> IO [B.ByteString] -- ^ a list the @k@ primary blocks decode (FECParams params k n) inblocks - | length (nub $ map fst inblocks) /= length inblocks = error "Duplicate input blocks in FEC decode" - | any ((\f -> f < 0 || f >= n) . fst) inblocks = error "Invalid block numbers in FEC decode" - | length inblocks /= k = error "Wrong number of blocks to FEC decode" - | not (allByteStringsSameLength $ map snd inblocks) = error "Not all inputs to FEC decode are same length" - | otherwise = - unsafePerformIO - ( do - let sz = B.length $ snd $ head inblocks - inblocks' = reorderPrimaryBlocks k inblocks - presentBlocks = map fst inblocks' - withForeignPtr - params - ( \cfec' -> do - byteStringsToArray - (map snd inblocks') - ( \src -> do - b <- - createByteStringArray - (n - k) - sz - ( \out -> do - uintCArray - presentBlocks - ( \block_nums -> do - _decode cfec' src out block_nums $ fromIntegral sz - ) - ) - let blocks = [0 .. (n - 1)] \\ presentBlocks - tagged = zip blocks b - allBlocks = sortTagged $ tagged ++ inblocks' - return $ take k $ map snd allBlocks - ) - ) - ) + | length (nub $ map fst inblocks) /= length (inblocks) = error "Duplicate input blocks in FEC decode" + | any (\f -> f < 0 || f >= n) $ map fst inblocks = error "Invalid block numbers in FEC decode" + | length inblocks /= k = error "Wrong number of blocks to FEC decode" + | not (allByteStringsSameLength $ map snd inblocks) = error "Not all inputs to FEC decode are same length" + | otherwise = do + let sz = B.length $ snd $ head inblocks + inblocks' = reorderPrimaryBlocks k inblocks + presentBlocks = map fst inblocks' + withForeignPtr params (\cfec -> do + byteStringsToArray (map snd inblocks') (\src -> do + b <- createByteStringArray (n - k) sz (\out -> do + uintCArray presentBlocks (\block_nums -> do + _decode cfec src out block_nums $ fromIntegral sz)) + let blocks = [0..(n - 1)] \\ presentBlocks + tagged = zip blocks b + allBlocks = sortTagged $ tagged ++ inblocks' + return $ take k $ map snd allBlocks)) {- | Break a ByteString into @n@ parts, equal in length to the original, such that all @n@ are required to reconstruct the original, but having less @@ -355,50 +296,44 @@ secureCombine [a] = a secureCombine [a, b] = B.pack $ B.zipWith xor a b secureCombine (a : rest) = B.pack $ B.zipWith xor a $ secureCombine rest -{- | A utility function which takes an arbitary input and FEC encodes it into a - number of blocks. The order the resulting blocks doesn't matter so long - as you have enough to present to @deFEC@. --} -enFEC :: - -- | the number of blocks required to reconstruct - Int -> - -- | the total number of blocks - Int -> - -- | the data to divide - B.ByteString -> - -- | the resulting blocks - [B.ByteString] -enFEC k n input = taggedPrimaryBlocks ++ taggedSecondaryBlocks +-- | A utility function which takes an arbitary input and FEC encodes it into a +-- number of blocks. The order the resulting blocks doesn't matter so long +-- as you have enough to present to @deFEC@. +enFEC :: Int -- ^ the number of blocks required to reconstruct + -> Int -- ^ the total number of blocks + -> B.ByteString -- ^ the data to divide + -> IO [B.ByteString] -- ^ the resulting blocks +enFEC k n input = do + params <- fec k n + secondaryBlocks <- encode params primaryBlocks + pure $ taggedPrimaryBlocks ++ (taggedSecondaryBlocks secondaryBlocks) where - taggedPrimaryBlocks = zipWith B.cons [0 ..] primaryBlocks - taggedSecondaryBlocks = zipWith B.cons [(fromIntegral k) ..] secondaryBlocks + taggedPrimaryBlocks = map (uncurry B.cons) $ zip [0..] primaryBlocks + taggedSecondaryBlocks sb = map (uncurry B.cons) $ zip [(fromIntegral k)..] sb remainder = B.length input `mod` k - paddingLength = if remainder >= 1 then k - remainder else k - paddingBytes = B.replicate (paddingLength - 1) 0 `B.append` B.singleton (fromIntegral paddingLength) + paddingLength = if remainder >= 1 then (k - remainder) else k + paddingBytes = (B.replicate (paddingLength - 1) 0) `B.append` (B.singleton $ fromIntegral paddingLength) divide a bs | B.null bs = [] - | otherwise = B.take a bs : divide a (B.drop a bs) + | otherwise = (B.take a bs) : (divide a $ B.drop a bs) input' = input `B.append` paddingBytes blockSize = B.length input' `div` k primaryBlocks = divide blockSize input' - secondaryBlocks = encode params primaryBlocks - params = fec k n + -- | Reverses the operation of @enFEC@. -deFEC :: - -- | the number of blocks required (matches call to @enFEC@) - Int -> - -- | the total number of blocks (matches call to @enFEC@) - Int -> - -- | a list of k, or more, blocks from @enFEC@ - [B.ByteString] -> - B.ByteString +deFEC :: Int -- ^ the number of blocks required (matches call to @enFEC@) + -> Int -- ^ the total number of blocks (matches call to @enFEC@) + -> [B.ByteString] -- ^ a list of k, or more, blocks from @enFEC@ + -> IO B.ByteString deFEC k n inputs - | length inputs < k = error "Too few inputs to deFEC" - | otherwise = B.take (B.length fecOutput - paddingLength) fecOutput - where - paddingLength = fromIntegral $ B.last fecOutput - inputs' = take k inputs - taggedInputs = map (\bs -> (fromIntegral $ B.head bs, B.tail bs)) inputs' - fecOutput = B.concat $ decode params taggedInputs - params = fec k n + | length inputs < k = error "Too few inputs to deFEC" + | otherwise = + let + paddingLength output = fromIntegral $ B.last output + inputs' = take k inputs + taggedInputs = map (\bs -> (fromIntegral $ B.head bs, B.tail bs)) inputs' + in do + params <- fec k n + fecOutput <- B.concat <$> decode params taggedInputs + pure $ B.take (B.length fecOutput - paddingLength fecOutput) fecOutput diff --git a/haskell/test/FECTest.hs b/haskell/test/FECTest.hs index 1d5e909..b797f34 100644 --- a/haskell/test/FECTest.hs +++ b/haskell/test/FECTest.hs @@ -1,30 +1,22 @@ {-# LANGUAGE DerivingStrategies #-} + module Main where -import Test.Hspec (describe, hspec, it, parallel) +import Test.Hspec (describe, hspec, it, parallel, Expectation, shouldBe) + +import Control.Monad (replicateM_) import qualified Codec.FEC as FEC import qualified Data.ByteString as B -import qualified Data.ByteString.Lazy as BL import Data.List (sortOn) import Data.Word (Word16, Word8) import System.Random (Random (randoms), mkStdGen) -import Test.QuickCheck ( - Arbitrary (arbitrary), - Property, - Testable (property), - choose, - conjoin, - once, - withMaxSuccess, - (===), - ) +import Test.QuickCheck ( Arbitrary (arbitrary), Property, Testable (property), choose, once, withMaxSuccess) import Test.QuickCheck.Monadic (assert, monadicIO, run) -- Imported for the orphan Arbitrary ByteString instance. -import Control.Monad (replicateM_) import Test.QuickCheck.Instances.ByteString () -- | Valid ZFEC parameters. @@ -36,9 +28,10 @@ data Params = Params -- | A somewhat efficient generator for valid ZFEC parameters. instance Arbitrary Params where - arbitrary = - choose (1, 255) - >>= \req -> Params req <$> choose (req, 255) + arbitrary = do + req <- choose (1, 255) + tot <- choose (req, 255) + return $ Params req tot randomTake :: Int -> Int -> [a] -> [a] randomTake seed n values = map snd $ take n sortedValues @@ -63,24 +56,26 @@ testFEC :: Int -> -- | True if the encoded input was reconstructed by decoding, False -- otherwise. - Bool -testFEC fec len seed = FEC.decode fec someTaggedBlocks == origBlocks - where - -- Construct some blocks. Each will just be the byte corresponding to the - -- block number repeated to satisfy the requested length. - origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)] - + Expectation +testFEC fec len seed = do -- Encode the data to produce the "secondary" blocks which (might) add -- redundancy to the original blocks. - secondaryBlocks = FEC.encode fec origBlocks + secondaryBlocks <- FEC.encode fec origBlocks - -- Tag each block with its block number because the decode API requires - -- this information. - taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks) + let -- Tag each block with its block number because the decode API requires + -- this information. + taggedBlocks = zip [0 ..] (origBlocks ++ secondaryBlocks) - -- Choose enough of the tagged blocks (some combination of original and - -- secondary) to try to use for decoding. - someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks + -- Choose enough of the tagged blocks (some combination of original and + -- secondary) to try to use for decoding. + someTaggedBlocks = randomTake seed (FEC.paramK fec) taggedBlocks + + decoded <- FEC.decode fec someTaggedBlocks + decoded `shouldBe` origBlocks + where + -- Construct some blocks. Each will just be the byte corresponding to the + -- block number repeated to satisfy the requested length. + origBlocks = B.replicate (fromIntegral len) . fromIntegral <$> [0 .. (FEC.paramK fec - 1)] -- | @FEC.secureDivide@ is the inverse of @FEC.secureCombine@. prop_divide :: Word16 -> Word8 -> Word8 -> Property @@ -91,25 +86,23 @@ prop_divide size byte divisor = monadicIO $ do -- | @FEC.encode@ is the inverse of @FEC.decode@. prop_decode :: Params -> Word16 -> Int -> Property -prop_decode (Params req tot) len seed = property $ do - testFEC fec len seed === True - where - fec = FEC.fec req tot +prop_decode (Params req tot) len seed = + monadicIO . run $ do + fec <- FEC.fec req tot + testFEC fec len seed -- | @FEC.enFEC@ is the inverse of @FEC.deFEC@. prop_deFEC :: Params -> B.ByteString -> Property -prop_deFEC (Params req tot) testdata = - FEC.deFEC req tot minimalShares === testdata - where - allShares = FEC.enFEC req tot testdata - minimalShares = take req allShares +prop_deFEC (Params req tot) testdata = monadicIO $ do + encoded <- run $ FEC.enFEC req tot testdata + decoded <- run $ FEC.deFEC req tot (take req encoded) + assert $ testdata == decoded -prop_primary_copies :: Params -> BL.ByteString -> Property -prop_primary_copies (Params _ tot) primary = property $ do - conjoin $ (BL.toStrict primary ===) <$> secondary - where - fec = FEC.fec 1 tot - secondary = FEC.encode fec [BL.toStrict primary] +prop_primary_copies :: Params -> B.ByteString -> Property +prop_primary_copies (Params _ tot) primary = monadicIO $ do + fec <- run $ FEC.fec 1 tot + secondary <- run $ FEC.encode fec [primary] + assert $ all (primary ==) secondary main :: IO () main = do @@ -143,11 +136,17 @@ main = do it "is the inverse of secureDivide n" $ once $ prop_divide 1024 65 3 describe "deFEC" $ do - replicateM_ 10 $ - it "is the inverse of enFEC" $ - property prop_deFEC + it "is the inverse of enFEC" $ withMaxSuccess 200 prop_deFEC + + describe "decode" $ do + it "is (nearly) the inverse of encode" $ withMaxSuccess 200 prop_decode + it "works with total=255" $ property $ prop_decode (Params 1 255) + it "works with required=255" $ property $ prop_decode (Params 255 255) - describe "decode" $ - replicateM_ 10 $ do - it "is (nearly) the inverse of encode" $ property $ prop_decode - it "works with required=255" $ property $ prop_decode (Params 255 255) + describe "encode" $ do + -- Since a single property won't result in parallel execution, add a + -- few of these. + replicateM_ 10 $ + it "returns copies of the primary block for all 1 of N encodings" $ + property $ + withMaxSuccess 10000 prop_primary_copies