Skip to content

Commit 369fb4b

Browse files
authored
Tighter bounds for IntMap and IntSet merge (#1110)
1 parent bc0bcca commit 369fb4b

File tree

3 files changed

+102
-35
lines changed

3 files changed

+102
-35
lines changed

containers/src/Data/IntMap/Internal.hs

+72-20
Original file line numberDiff line numberDiff line change
@@ -740,7 +740,8 @@ unsafeFindMax (Bin _ _ r) = unsafeFindMax r
740740
{--------------------------------------------------------------------
741741
Disjoint
742742
--------------------------------------------------------------------}
743-
-- | \(O(n+m)\). Check whether the key sets of two maps are disjoint
743+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
744+
-- Check whether the key sets of two maps are disjoint
744745
-- (i.e. their 'intersection' is empty).
745746
--
746747
-- > disjoint (fromList [(2,'a')]) (fromList [(1,()), (3,())]) == True
@@ -1091,7 +1092,8 @@ unionsWith :: Foldable f => (a->a->a) -> f (IntMap a) -> IntMap a
10911092
unionsWith f ts
10921093
= Foldable.foldl' (unionWith f) empty ts
10931094

1094-
-- | \(O(n+m)\). The (left-biased) union of two maps.
1095+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1096+
-- The (left-biased) union of two maps.
10951097
-- It prefers the first map when duplicate keys are encountered,
10961098
-- i.e. (@'union' == 'unionWith' 'const'@).
10971099
--
@@ -1101,7 +1103,8 @@ union :: IntMap a -> IntMap a -> IntMap a
11011103
union m1 m2
11021104
= mergeWithKey' Bin const id id m1 m2
11031105

1104-
-- | \(O(n+m)\). The union with a combining function.
1106+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1107+
-- The union with a combining function.
11051108
--
11061109
-- > unionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "aA"), (7, "C")]
11071110
--
@@ -1111,7 +1114,8 @@ unionWith :: (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
11111114
unionWith f m1 m2
11121115
= unionWithKey (\_ x y -> f x y) m1 m2
11131116

1114-
-- | \(O(n+m)\). The union with a combining function.
1117+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1118+
-- The union with a combining function.
11151119
--
11161120
-- > let f key left_value right_value = (show key) ++ ":" ++ left_value ++ "|" ++ right_value
11171121
-- > unionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "5:a|A"), (7, "C")]
@@ -1125,15 +1129,17 @@ unionWithKey f m1 m2
11251129
{--------------------------------------------------------------------
11261130
Difference
11271131
--------------------------------------------------------------------}
1128-
-- | \(O(n+m)\). Difference between two maps (based on keys).
1132+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1133+
-- Difference between two maps (based on keys).
11291134
--
11301135
-- > difference (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 3 "b"
11311136

11321137
difference :: IntMap a -> IntMap b -> IntMap a
11331138
difference m1 m2
11341139
= mergeWithKey (\_ _ _ -> Nothing) id (const Nil) m1 m2
11351140

1136-
-- | \(O(n+m)\). Difference with a combining function.
1141+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1142+
-- Difference with a combining function.
11371143
--
11381144
-- > let f al ar = if al == "b" then Just (al ++ ":" ++ ar) else Nothing
11391145
-- > differenceWith f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (3, "B"), (7, "C")])
@@ -1143,7 +1149,8 @@ differenceWith :: (a -> b -> Maybe a) -> IntMap a -> IntMap b -> IntMap a
11431149
differenceWith f m1 m2
11441150
= differenceWithKey (\_ x y -> f x y) m1 m2
11451151

1146-
-- | \(O(n+m)\). Difference with a combining function. When two equal keys are
1152+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1153+
-- Difference with a combining function. When two equal keys are
11471154
-- encountered, the combining function is applied to the key and both values.
11481155
-- If it returns 'Nothing', the element is discarded (proper set difference).
11491156
-- If it returns (@'Just' y@), the element is updated with a new value @y@.
@@ -1157,8 +1164,8 @@ differenceWithKey f m1 m2
11571164
= mergeWithKey f id (const Nil) m1 m2
11581165

11591166

1160-
-- TODO(wrengr): re-verify that asymptotic bound
1161-
-- | \(O(n+m)\). Remove all the keys in a given set from a map.
1167+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1168+
-- Remove all the keys in a given set from a map.
11621169
--
11631170
-- @
11641171
-- m \`withoutKeys\` s = 'filterWithKey' (\\k _ -> k ``IntSet.notMember`` s) m
@@ -1221,7 +1228,8 @@ withoutBM _ Nil = Nil
12211228
{--------------------------------------------------------------------
12221229
Intersection
12231230
--------------------------------------------------------------------}
1224-
-- | \(O(n+m)\). The (left-biased) intersection of two maps (based on keys).
1231+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1232+
-- The (left-biased) intersection of two maps (based on keys).
12251233
--
12261234
-- > intersection (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "a"
12271235

@@ -1230,8 +1238,8 @@ intersection m1 m2
12301238
= mergeWithKey' bin const (const Nil) (const Nil) m1 m2
12311239

12321240

1233-
-- TODO(wrengr): re-verify that asymptotic bound
1234-
-- | \(O(n+m)\). The restriction of a map to the keys in a set.
1241+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1242+
-- The restriction of a map to the keys in a set.
12351243
--
12361244
-- @
12371245
-- m \`restrictKeys\` s = 'filterWithKey' (\\k _ -> k ``IntSet.member`` s) m
@@ -1291,15 +1299,17 @@ restrictBM bm t@(Tip k _)
12911299
restrictBM _ Nil = Nil
12921300

12931301

1294-
-- | \(O(n+m)\). The intersection with a combining function.
1302+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1303+
-- The intersection with a combining function.
12951304
--
12961305
-- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"
12971306

12981307
intersectionWith :: (a -> b -> c) -> IntMap a -> IntMap b -> IntMap c
12991308
intersectionWith f m1 m2
13001309
= intersectionWithKey (\_ x y -> f x y) m1 m2
13011310

1302-
-- | \(O(n+m)\). The intersection with a combining function.
1311+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1312+
-- The intersection with a combining function.
13031313
--
13041314
-- > let f k al ar = (show k) ++ ":" ++ al ++ "|" ++ ar
13051315
-- > intersectionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "5:a|A"
@@ -1312,7 +1322,8 @@ intersectionWithKey f m1 m2
13121322
Symmetric difference
13131323
--------------------------------------------------------------------}
13141324

1315-
-- | \(O(n+m)\). The symmetric difference of two maps.
1325+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1326+
-- The symmetric difference of two maps.
13161327
--
13171328
-- The result contains entries whose keys appear in exactly one of the two maps.
13181329
--
@@ -1355,7 +1366,8 @@ symDiffTip !t1 !k1 = go
13551366
MergeWithKey
13561367
--------------------------------------------------------------------}
13571368

1358-
-- | \(O(n+m)\). A high-performance universal combining function. Using
1369+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
1370+
-- A high-performance universal combining function. Using
13591371
-- 'mergeWithKey', all combining functions can be defined without any loss of
13601372
-- efficiency (with exception of 'union', 'difference' and 'intersection',
13611373
-- where sharing of some nodes is lost with 'mergeWithKey').
@@ -1391,6 +1403,7 @@ symDiffTip !t1 !k1 = go
13911403
-- @only2@ are 'id' and @'const' 'empty'@, but for example @'map' f@ or
13921404
-- @'filterWithKey' f@ could be used for any @f@.
13931405

1406+
-- See Note [IntMap merge complexity]
13941407
mergeWithKey :: (Key -> a -> b -> Maybe c) -> (IntMap a -> IntMap c) -> (IntMap b -> IntMap c)
13951408
-> IntMap a -> IntMap b -> IntMap c
13961409
mergeWithKey f g1 g2 = mergeWithKey' bin combine g1 g2
@@ -2375,13 +2388,15 @@ deleteMax = maybe Nil snd . maxView
23752388
{--------------------------------------------------------------------
23762389
Submap
23772390
--------------------------------------------------------------------}
2378-
-- | \(O(n+m)\). Is this a proper submap? (ie. a submap but not equal).
2391+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
2392+
-- Is this a proper submap? (ie. a submap but not equal).
23792393
-- Defined as (@'isProperSubmapOf' = 'isProperSubmapOfBy' (==)@).
23802394
isProperSubmapOf :: Eq a => IntMap a -> IntMap a -> Bool
23812395
isProperSubmapOf m1 m2
23822396
= isProperSubmapOfBy (==) m1 m2
23832397

2384-
{- | \(O(n+m)\). Is this a proper submap? (ie. a submap but not equal).
2398+
{- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
2399+
Is this a proper submap? (ie. a submap but not equal).
23852400
The expression (@'isProperSubmapOfBy' f m1 m2@) returns 'True' when
23862401
@keys m1@ and @keys m2@ are not equal,
23872402
all keys in @m1@ are in @m2@, and when @f@ returns 'True' when
@@ -2432,13 +2447,14 @@ submapCmp predicate (Tip k x) t
24322447
submapCmp _ Nil Nil = EQ
24332448
submapCmp _ Nil _ = LT
24342449

2435-
-- | \(O(n+m)\). Is this a submap?
2450+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
2451+
-- Is this a submap?
24362452
-- Defined as (@'isSubmapOf' = 'isSubmapOfBy' (==)@).
24372453
isSubmapOf :: Eq a => IntMap a -> IntMap a -> Bool
24382454
isSubmapOf m1 m2
24392455
= isSubmapOfBy (==) m1 m2
24402456

2441-
{- | \(O(n+m)\).
2457+
{- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
24422458
The expression (@'isSubmapOfBy' f m1 m2@) returns 'True' if
24432459
all keys in @m1@ are in @m2@, and when @f@ returns 'True' when
24442460
applied to their respective values. For example, the following
@@ -3812,3 +3828,39 @@ withEmpty bars = " ":bars
38123828
-- right child. We have the same three cases for a Bin. However, the bitwise
38133829
-- operations we use to determine the case is naturally different due to the
38143830
-- difference in representation.
3831+
3832+
-- Note [IntMap merge complexity]
3833+
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3834+
-- The merge algorithm (used for union, intersection, etc.) is adopted from
3835+
-- Okasaki-Gill who give the complexity as O(m+n), where m and n are the sizes
3836+
-- of the two input maps. This is correct, since we visit all constructors in
3837+
-- both maps in the worst case, but we can try to find a tighter bound.
3838+
--
3839+
-- Consider that m<=n, i.e. m is the size of the smaller map and n is the size
3840+
-- of the larger. It does not matter which map is the first argument.
3841+
--
3842+
-- Now we have O(n) as one upper bound for our complexity, since O(n) is the
3843+
-- same as O(m+n) for m<=n.
3844+
--
3845+
-- Next, consider the smaller map. For this map, we will visit some
3846+
-- constructors, plus all the Bins of the larger map that lie in our way.
3847+
-- For the former, the worst case is that we visit all constructors, which is
3848+
-- O(m).
3849+
-- For the latter, the worst case is that we encounter Bins at every point
3850+
-- possible. This happens when for every key in the smaller map, the path to
3851+
-- that key's Tip in the larger map has a full length of W, with a Bin at every
3852+
-- bit position. To maximize the total number of Bins, the paths should be as
3853+
-- disjoint as possible. But even if the paths are spread out, at least O(m)
3854+
-- Bins are unavoidably shared, which extend up to a depth of lg(m) from the
3855+
-- root. Beyond this, the paths may be disjoint. This gives us a total of
3856+
-- O(m + m (W - lg m)) = O(m log (2^W / m)).
3857+
-- The number of Bins we encounter is also bounded by the total number of Bins,
3858+
-- which is n-1, but we already have O(n) as an upper bound.
3859+
--
3860+
-- Combining our bounds, we have the final complexity as
3861+
-- O(min(n, m log (2^W / m))).
3862+
--
3863+
-- Note that
3864+
-- * This is similar to the Map merge complexity, which is O(m log (n/m)).
3865+
-- * When m is a small constant the term simplifies to O(min(n, W)), which is
3866+
-- just the complexity we expect for single operations like insert and delete.

containers/src/Data/IntMap/Strict/Internal.hs

+14-7
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,8 @@ unionsWith :: Foldable f => (a->a->a) -> f (IntMap a) -> IntMap a
636636
unionsWith f ts
637637
= Foldable.foldl' (unionWith f) empty ts
638638

639-
-- | \(O(n+m)\). The union with a combining function.
639+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
640+
-- The union with a combining function.
640641
--
641642
-- > unionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "aA"), (7, "C")]
642643
--
@@ -646,7 +647,8 @@ unionWith :: (a -> a -> a) -> IntMap a -> IntMap a -> IntMap a
646647
unionWith f m1 m2
647648
= unionWithKey (\_ x y -> f x y) m1 m2
648649

649-
-- | \(O(n+m)\). The union with a combining function.
650+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
651+
-- The union with a combining function.
650652
--
651653
-- > let f key left_value right_value = (show key) ++ ":" ++ left_value ++ "|" ++ right_value
652654
-- > unionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == fromList [(3, "b"), (5, "5:a|A"), (7, "C")]
@@ -661,7 +663,8 @@ unionWithKey f m1 m2
661663
Difference
662664
--------------------------------------------------------------------}
663665

664-
-- | \(O(n+m)\). Difference with a combining function.
666+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
667+
-- Difference with a combining function.
665668
--
666669
-- > let f al ar = if al == "b" then Just (al ++ ":" ++ ar) else Nothing
667670
-- > differenceWith f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (3, "B"), (7, "C")])
@@ -671,7 +674,8 @@ differenceWith :: (a -> b -> Maybe a) -> IntMap a -> IntMap b -> IntMap a
671674
differenceWith f m1 m2
672675
= differenceWithKey (\_ x y -> f x y) m1 m2
673676

674-
-- | \(O(n+m)\). Difference with a combining function. When two equal keys are
677+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
678+
-- Difference with a combining function. When two equal keys are
675679
-- encountered, the combining function is applied to the key and both values.
676680
-- If it returns 'Nothing', the element is discarded (proper set difference).
677681
-- If it returns (@'Just' y@), the element is updated with a new value @y@.
@@ -688,15 +692,17 @@ differenceWithKey f m1 m2
688692
Intersection
689693
--------------------------------------------------------------------}
690694

691-
-- | \(O(n+m)\). The intersection with a combining function.
695+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
696+
-- The intersection with a combining function.
692697
--
693698
-- > intersectionWith (++) (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "aA"
694699

695700
intersectionWith :: (a -> b -> c) -> IntMap a -> IntMap b -> IntMap c
696701
intersectionWith f m1 m2
697702
= intersectionWithKey (\_ x y -> f x y) m1 m2
698703

699-
-- | \(O(n+m)\). The intersection with a combining function.
704+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
705+
-- The intersection with a combining function.
700706
--
701707
-- > let f k al ar = (show k) ++ ":" ++ al ++ "|" ++ ar
702708
-- > intersectionWithKey f (fromList [(5, "a"), (3, "b")]) (fromList [(5, "A"), (7, "C")]) == singleton 5 "5:a|A"
@@ -709,7 +715,8 @@ intersectionWithKey f m1 m2
709715
MergeWithKey
710716
--------------------------------------------------------------------}
711717

712-
-- | \(O(n+m)\). A high-performance universal combining function. Using
718+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
719+
-- A high-performance universal combining function. Using
713720
-- 'mergeWithKey', all combining functions can be defined without any loss of
714721
-- efficiency (with exception of 'union', 'difference' and 'intersection',
715722
-- where sharing of some nodes is lost with 'mergeWithKey').

containers/src/Data/IntSet/Internal.hs

+16-8
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ infixl 9 \\{-This comment teaches CPP correct behaviour -}
242242
{--------------------------------------------------------------------
243243
Operators
244244
--------------------------------------------------------------------}
245-
-- | \(O(n+m)\). See 'difference'.
245+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
246+
-- See 'difference'.
246247
(\\) :: IntSet -> IntSet -> IntSet
247248
m1 \\ m2 = difference m1 m2
248249

@@ -578,7 +579,8 @@ unions xs
578579
= Foldable.foldl' union empty xs
579580

580581

581-
-- | \(O(n+m)\). The union of two sets.
582+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
583+
-- The union of two sets.
582584
union :: IntSet -> IntSet -> IntSet
583585
union t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
584586
ABL -> Bin p1 (union l1 t2) r1
@@ -596,7 +598,8 @@ union Nil t = t
596598
{--------------------------------------------------------------------
597599
Difference
598600
--------------------------------------------------------------------}
599-
-- | \(O(n+m)\). Difference between two sets.
601+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
602+
-- Difference between two sets.
600603
difference :: IntSet -> IntSet -> IntSet
601604
difference t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
602605
ABL -> bin p1 (difference l1 t2) r1
@@ -624,7 +627,8 @@ difference Nil _ = Nil
624627
{--------------------------------------------------------------------
625628
Intersection
626629
--------------------------------------------------------------------}
627-
-- | \(O(n+m)\). The intersection of two sets.
630+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
631+
-- The intersection of two sets.
628632
intersection :: IntSet -> IntSet -> IntSet
629633
intersection t1@(Bin p1 l1 r1) t2@(Bin p2 l2 r2) = case treeTreeBranch p1 p2 of
630634
ABL -> intersection l1 t2
@@ -696,7 +700,8 @@ instance Semigroup Intersection where
696700
Symmetric difference
697701
--------------------------------------------------------------------}
698702

699-
-- | \(O(n+m)\). The symmetric difference of two sets.
703+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
704+
-- The symmetric difference of two sets.
700705
--
701706
-- The result contains elements that appear in exactly one of the two sets.
702707
--
@@ -734,7 +739,8 @@ symDiffTip !t1 !kx1 !bm1 = go
734739
{--------------------------------------------------------------------
735740
Subset
736741
--------------------------------------------------------------------}
737-
-- | \(O(n+m)\). Is this a proper subset? (ie. a subset but not equal).
742+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
743+
-- Is this a proper subset? (ie. a subset but not equal).
738744
isProperSubsetOf :: IntSet -> IntSet -> Bool
739745
isProperSubsetOf t1 t2
740746
= case subsetCmp t1 t2 of
@@ -770,7 +776,8 @@ subsetCmp (Tip _ _) Nil = GT -- disjoint
770776
subsetCmp Nil Nil = EQ
771777
subsetCmp Nil _ = LT
772778

773-
-- | \(O(n+m)\). Is this a subset?
779+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
780+
-- Is this a subset?
774781
-- @(s1 \`isSubsetOf\` s2)@ tells whether @s1@ is a subset of @s2@.
775782

776783
isSubsetOf :: IntSet -> IntSet -> Bool
@@ -794,7 +801,8 @@ isSubsetOf Nil _ = True
794801
{--------------------------------------------------------------------
795802
Disjoint
796803
--------------------------------------------------------------------}
797-
-- | \(O(n+m)\). Check whether two sets are disjoint (i.e. their intersection
804+
-- | \(O(\min(n, m \log \frac{2^W}{m})), m \leq n\).
805+
-- Check whether two sets are disjoint (i.e. their intersection
798806
-- is empty).
799807
--
800808
-- > disjoint (fromList [2,4,6]) (fromList [1,3]) == True

0 commit comments

Comments
 (0)