Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tighter bounds for IntMap and IntSet merge #1110

Merged
merged 2 commits into from
Feb 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 72 additions & 20 deletions containers/src/Data/IntMap/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -740,7 +740,8 @@ unsafeFindMax (Bin _ _ r) = unsafeFindMax r
{--------------------------------------------------------------------
Disjoint
--------------------------------------------------------------------}
-- | \(O(n+m)\). Check whether the key sets of two maps are disjoint
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Check whether the key sets of two maps are disjoint
-- (i.e. their 'intersection' is empty).
--
-- > disjoint (fromList [(2,'a')]) (fromList [(1,()), (3,())]) == True
Expand Down Expand Up @@ -1091,7 +1092,8 @@ unionsWith :: Foldable f => (a->a->a) -> f (IntMap a) -> IntMap a
unionsWith f ts
= Foldable.foldl' (unionWith f) empty ts

-- | \(O(n+m)\). The (left-biased) union of two maps.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The (left-biased) union of two maps.
-- It prefers the first map when duplicate keys are encountered,
-- i.e. (@'union' == 'unionWith' 'const'@).
--
Expand All @@ -1101,7 +1103,8 @@ union :: IntMap a -> IntMap a -> IntMap a
union m1 m2
= mergeWithKey' Bin const id id m1 m2

-- | \(O(n+m)\). The union with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The union with a combining function.
--
-- > unionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "aA"), (7, "C")]
--
Expand All @@ -1111,7 +1114,8 @@ unionWith :: (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
unionWith f m1 m2
= unionWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). The union with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The union with a combining function.
--
-- > let f key left_value right_value = (show key) ++ ":" ++ left_value ++ "|" ++ right_value
-- > unionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "5:a|A"), (7, "C")]
Expand All @@ -1125,15 +1129,17 @@ unionWithKey f m1 m2
{--------------------------------------------------------------------
Difference
--------------------------------------------------------------------}
-- | \(O(n+m)\). Difference between two maps (based on keys).
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference between two maps (based on keys).
--
-- > difference (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 3 "b"

difference :: IntMap a -> IntMap b -> IntMap a
difference m1 m2
= mergeWithKey (\_ _ _ -> Nothing) id (const Nil) m1 m2

-- | \(O(n+m)\). Difference with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference with a combining function.
--
-- > let f al ar = if al == "b" then Just (al ++ ":" ++ ar) else Nothing
-- > differenceWith f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (3, "B"), (7, "C")])
Expand All @@ -1143,7 +1149,8 @@ differenceWith :: (a -> b -> Maybe a) -> IntMap a -> IntMap b -> IntMap a
differenceWith f m1 m2
= differenceWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). Difference with a combining function. When two equal keys are
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference with a combining function. When two equal keys are
-- encountered, the combining function is applied to the key and both values.
-- If it returns 'Nothing', the element is discarded (proper set difference).
-- If it returns (@'Just' y@), the element is updated with a new value @y@.
Expand All @@ -1157,8 +1164,8 @@ differenceWithKey f m1 m2
= mergeWithKey f id (const Nil) m1 m2


-- TODO(wrengr): re-verify that asymptotic bound
-- | \(O(n+m)\). Remove all the keys in a given set from a map.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Remove all the keys in a given set from a map.
--
-- @
-- m \`withoutKeys\` s = 'filterWithKey' (\\k _ -> k ``IntSet.notMember`` s) m
Expand Down Expand Up @@ -1221,7 +1228,8 @@ withoutBM _ Nil = Nil
{--------------------------------------------------------------------
Intersection
--------------------------------------------------------------------}
-- | \(O(n+m)\). The (left-biased) intersection of two maps (based on keys).
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The (left-biased) intersection of two maps (based on keys).
--
-- > intersection (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "a"

Expand All @@ -1230,8 +1238,8 @@ intersection m1 m2
= mergeWithKey' bin const (const Nil) (const Nil) m1 m2


-- TODO(wrengr): re-verify that asymptotic bound
-- | \(O(n+m)\). The restriction of a map to the keys in a set.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The restriction of a map to the keys in a set.
--
-- @
-- m \`restrictKeys\` s = 'filterWithKey' (\\k _ -> k ``IntSet.member`` s) m
Expand Down Expand Up @@ -1291,15 +1299,17 @@ restrictBM bm t@(Tip k _)
restrictBM _ Nil = Nil


-- | \(O(n+m)\). The intersection with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The intersection with a combining function.
--
-- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"

intersectionWith :: (a -> b -> c) -> IntMap a -> IntMap b -> IntMap c
intersectionWith f m1 m2
= intersectionWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). The intersection with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The intersection with a combining function.
--
-- > let f k al ar = (show k) ++ ":" ++ al ++ "|" ++ ar
-- > intersectionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "5:a|A"
Expand All @@ -1312,7 +1322,8 @@ intersectionWithKey f m1 m2
Symmetric difference
--------------------------------------------------------------------}

-- | \(O(n+m)\). The symmetric difference of two maps.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The symmetric difference of two maps.
--
-- The result contains entries whose keys appear in exactly one of the two maps.
--
Expand Down Expand Up @@ -1355,7 +1366,8 @@ symDiffTip !t1 !k1 = go
MergeWithKey
--------------------------------------------------------------------}

-- | \(O(n+m)\). A high-performance universal combining function. Using
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- A high-performance universal combining function. Using
-- 'mergeWithKey', all combining functions can be defined without any loss of
-- efficiency (with exception of 'union', 'difference' and 'intersection',
-- where sharing of some nodes is lost with 'mergeWithKey').
Expand Down Expand Up @@ -1391,6 +1403,7 @@ symDiffTip !t1 !k1 = go
-- @only2@ are 'id' and @'const' 'empty'@, but for example @'map' f@ or
-- @'filterWithKey' f@ could be used for any @f@.

-- See Note [IntMap merge complexity]
mergeWithKey :: (Key -> a -> b -> Maybe c) -> (IntMap a -> IntMap c) -> (IntMap b -> IntMap c)
-> IntMap a -> IntMap b -> IntMap c
mergeWithKey f g1 g2 = mergeWithKey' bin combine g1 g2
Expand Down Expand Up @@ -2375,13 +2388,15 @@ deleteMax = maybe Nil snd . maxView
{--------------------------------------------------------------------
Submap
--------------------------------------------------------------------}
-- | \(O(n+m)\). Is this a proper submap? (ie. a submap but not equal).
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Is this a proper submap? (ie. a submap but not equal).
-- Defined as (@'isProperSubmapOf' = 'isProperSubmapOfBy' (==)@).
isProperSubmapOf :: Eq a => IntMap a -> IntMap a -> Bool
isProperSubmapOf m1 m2
= isProperSubmapOfBy (==) m1 m2

{- | \(O(n+m)\). Is this a proper submap? (ie. a submap but not equal).
{- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
Is this a proper submap? (ie. a submap but not equal).
The expression (@'isProperSubmapOfBy' f m1 m2@) returns 'True' when
@keys m1@ and @keys m2@ are not equal,
all keys in @m1@ are in @m2@, and when @f@ returns 'True' when
Expand Down Expand Up @@ -2432,13 +2447,14 @@ submapCmp predicate (Tip k x) t
submapCmp _ Nil Nil = EQ
submapCmp _ Nil _ = LT

-- | \(O(n+m)\). Is this a submap?
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Is this a submap?
-- Defined as (@'isSubmapOf' = 'isSubmapOfBy' (==)@).
isSubmapOf :: Eq a => IntMap a -> IntMap a -> Bool
isSubmapOf m1 m2
= isSubmapOfBy (==) m1 m2

{- | \(O(n+m)\).
{- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
The expression (@'isSubmapOfBy' f m1 m2@) returns 'True' if
all keys in @m1@ are in @m2@, and when @f@ returns 'True' when
applied to their respective values. For example, the following
Expand Down Expand Up @@ -3812,3 +3828,39 @@ withEmpty bars = " ":bars
-- right child. We have the same three cases for a Bin. However, the bitwise
-- operations we use to determine the case is naturally different due to the
-- difference in representation.

-- Note [IntMap merge complexity]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- The merge algorithm (used for union, intersection, etc.) is adopted from
-- Okasaki-Gill who give the complexity as O(m+n), where m and n are the sizes
-- of the two input maps. This is correct, since we visit all constructors in
-- both maps in the worst case, but we can try to find a tighter bound.
--
-- Consider that m<=n, i.e. m is the size of the smaller map and n is the size
-- of the larger. It does not matter which map is the first argument.
--
-- Now we have O(n) as one upper bound for our complexity, since O(n) is the
-- same as O(m+n) for m<=n.
--
-- Next, consider the smaller map. For this map, we will visit some
-- constructors, plus all the Bins of the larger map that lie in our way.
-- For the former, the worst case is that we visit all constructors, which is
-- O(m).
-- For the latter, the worst case is that we encounter Bins at every point
-- possible. This happens when for every key in the smaller map, the path to
-- that key's Tip in the larger map has a full length of W, with a Bin at every
-- bit position. To maximize the total number of Bins, the paths should be as
-- disjoint as possible. But even if the paths are spread out, at least O(m)
-- Bins are unavoidably shared, which extend up to a depth of lg(m) from the
-- root. Beyond this, the paths may be disjoint. This gives us a total of
-- O(m + m (W - lg m)) = O(m log (2^W / m)).
-- The number of Bins we encounter is also bounded by the total number of Bins,
-- which is n-1, but we already have O(n) as an upper bound.
--
-- Combining our bounds, we have the final complexity as
-- O(min(n, m log (2^W / m))).
--
-- Note that
-- * This is similar to the Map merge complexity, which is O(m log (n/m)).
-- * When m is a small constant the term simplifies to O(min(n, W)), which is
-- just the complexity we expect for single operations like insert and delete.
21 changes: 14 additions & 7 deletions containers/src/Data/IntMap/Strict/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -636,7 +636,8 @@ unionsWith :: Foldable f => (a->a->a) -> f (IntMap a) -> IntMap a
unionsWith f ts
= Foldable.foldl' (unionWith f) empty ts

-- | \(O(n+m)\). The union with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The union with a combining function.
--
-- > unionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "aA"), (7, "C")]
--
Expand All @@ -646,7 +647,8 @@ unionWith :: (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
unionWith f m1 m2
= unionWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). The union with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The union with a combining function.
--
-- > let f key left_value right_value = (show key) ++ ":" ++ left_value ++ "|" ++ right_value
-- > unionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "5:a|A"), (7, "C")]
Expand All @@ -661,7 +663,8 @@ unionWithKey f m1 m2
Difference
--------------------------------------------------------------------}

-- | \(O(n+m)\). Difference with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference with a combining function.
--
-- > let f al ar = if al == "b" then Just (al ++ ":" ++ ar) else Nothing
-- > differenceWith f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (3, "B"), (7, "C")])
Expand All @@ -671,7 +674,8 @@ differenceWith :: (a -> b -> Maybe a) -> IntMap a -> IntMap b -> IntMap a
differenceWith f m1 m2
= differenceWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). Difference with a combining function. When two equal keys are
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference with a combining function. When two equal keys are
-- encountered, the combining function is applied to the key and both values.
-- If it returns 'Nothing', the element is discarded (proper set difference).
-- If it returns (@'Just' y@), the element is updated with a new value @y@.
Expand All @@ -688,15 +692,17 @@ differenceWithKey f m1 m2
Intersection
--------------------------------------------------------------------}

-- | \(O(n+m)\). The intersection with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The intersection with a combining function.
--
-- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"

intersectionWith :: (a -> b -> c) -> IntMap a -> IntMap b -> IntMap c
intersectionWith f m1 m2
= intersectionWithKey (\_ x y -> f x y) m1 m2

-- | \(O(n+m)\). The intersection with a combining function.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The intersection with a combining function.
--
-- > let f k al ar = (show k) ++ ":" ++ al ++ "|" ++ ar
-- > intersectionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "5:a|A"
Expand All @@ -709,7 +715,8 @@ intersectionWithKey f m1 m2
MergeWithKey
--------------------------------------------------------------------}

-- | \(O(n+m)\). A high-performance universal combining function. Using
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- A high-performance universal combining function. Using
-- 'mergeWithKey', all combining functions can be defined without any loss of
-- efficiency (with exception of 'union', 'difference' and 'intersection',
-- where sharing of some nodes is lost with 'mergeWithKey').
Expand Down
24 changes: 16 additions & 8 deletions containers/src/Data/IntSet/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ infixl 9 \\{-This comment teaches CPP correct behaviour -}
{--------------------------------------------------------------------
Operators
--------------------------------------------------------------------}
-- | \(O(n+m)\). See 'difference'.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- See 'difference'.
(\\) :: IntSet -> IntSet -> IntSet
m1 \\ m2 = difference m1 m2

Expand Down Expand Up @@ -578,7 +579,8 @@ unions xs
= Foldable.foldl' union empty xs


-- | \(O(n+m)\). The union of two sets.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The union of two sets.
union :: IntSet -> IntSet -> IntSet
union t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
ABL -> Bin p1 (union l1 t2) r1
Expand All @@ -596,7 +598,8 @@ union Nil t = t
{--------------------------------------------------------------------
Difference
--------------------------------------------------------------------}
-- | \(O(n+m)\). Difference between two sets.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Difference between two sets.
difference :: IntSet -> IntSet -> IntSet
difference t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
ABL -> bin p1 (difference l1 t2) r1
Expand Down Expand Up @@ -624,7 +627,8 @@ difference Nil _ = Nil
{--------------------------------------------------------------------
Intersection
--------------------------------------------------------------------}
-- | \(O(n+m)\). The intersection of two sets.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The intersection of two sets.
intersection :: IntSet -> IntSet -> IntSet
intersection t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
ABL -> intersection l1 t2
Expand Down Expand Up @@ -696,7 +700,8 @@ instance Semigroup Intersection where
Symmetric difference
--------------------------------------------------------------------}

-- | \(O(n+m)\). The symmetric difference of two sets.
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- The symmetric difference of two sets.
--
-- The result contains elements that appear in exactly one of the two sets.
--
Expand Down Expand Up @@ -734,7 +739,8 @@ symDiffTip !t1 !kx1 !bm1 = go
{--------------------------------------------------------------------
Subset
--------------------------------------------------------------------}
-- | \(O(n+m)\). Is this a proper subset? (ie. a subset but not equal).
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Is this a proper subset? (ie. a subset but not equal).
isProperSubsetOf :: IntSet -> IntSet -> Bool
isProperSubsetOf t1 t2
= case subsetCmp t1 t2 of
Expand Down Expand Up @@ -770,7 +776,8 @@ subsetCmp (Tip _ _) Nil = GT -- disjoint
subsetCmp Nil Nil = EQ
subsetCmp Nil _ = LT

-- | \(O(n+m)\). Is this a subset?
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Is this a subset?
-- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@.

isSubsetOf :: IntSet -> IntSet -> Bool
Expand All @@ -794,7 +801,8 @@ isSubsetOf Nil _ = True
{--------------------------------------------------------------------
Disjoint
--------------------------------------------------------------------}
-- | \(O(n+m)\). Check whether two sets are disjoint (i.e. their intersection
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
-- Check whether two sets are disjoint (i.e. their intersection
-- is empty).
--
-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True
Expand Down
Loading