-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
chore: add balls and bins simulator #2001
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#!/usr/bin/env python3 | ||
|
||
"""Simulate throwing balls into bins.""" | ||
|
||
import numpy as np | ||
import argparse | ||
import matplotlib.pyplot as plt | ||
|
||
|
||
def simulate_balls_into_bins(balls: int, N, threshold: int, exact, trials=10000): | ||
"""Simulate throwing M balls into N bins for a given number of trials.""" | ||
counts = np.zeros(N, dtype=int) | ||
success = 0 | ||
exact_success = 0 | ||
deltas = [] | ||
|
||
for _ in range(trials): | ||
# Reset counts for each trial | ||
counts.fill(0) | ||
|
||
# Throw M balls into the bins | ||
bins = np.random.randint(0, N, balls) | ||
unique, counts_bins = np.unique(bins, return_counts=True) | ||
counts[unique] += counts_bins | ||
romange marked this conversation as resolved.
Show resolved
Hide resolved
|
||
deltas.append(counts.max() - counts.min()) | ||
# Check if any bin has K or more balls | ||
if np.any(counts >= threshold): | ||
success += 1 | ||
if exact is not None: | ||
if np.any(counts == exact): | ||
exact_success += 1 | ||
|
||
probability = success / trials | ||
return deltas, probability, exact_success / trials | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Simulate throwing balls into bins.") | ||
parser.add_argument("--balls", type=int, default=30, help="Number of balls to throw.") | ||
parser.add_argument("--bins", type=int, default=3, help="Number of bins.") | ||
parser.add_argument( | ||
"--high-threshold", | ||
type=int, | ||
default=15, | ||
help="Minimum number of balls for the success condition", | ||
) | ||
parser.add_argument( | ||
"--exact-num", type=int, help="Exact number of balls for the success condition." | ||
) | ||
parser.add_argument( | ||
"--trials", type=int, default=10000, help="Number of trials. Default is 10,000." | ||
) | ||
|
||
args = parser.parse_args() | ||
|
||
deltas, atleast_p, exact_p = simulate_balls_into_bins( | ||
args.balls, args.bins, args.high_threshold, args.exact_num, args.trials | ||
) | ||
|
||
print(f"Probability that at least one bin has {args.high_threshold} or more balls: {atleast_p}") | ||
if args.exact_num is not None: | ||
print(f"Probability that at least one bin has {args.exact_num} balls: {exact_p}") | ||
|
||
print( | ||
f"Histogram of the difference between the most and least populated bins for {args.trials} trials" | ||
) | ||
plt.hist(deltas, bins=30, color="steelblue", edgecolor="none") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are you limiting to 30 bins? I guess if we experiment with a large size of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 30 bins is 30 buckets that I assume would be defined according to the data distribution (auto bins). |
||
plt.show() | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tiny nit, if you plan to expand this in the future, replace
N
with bins since you already useballs
instead ofM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed