-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmake_small_files.py
128 lines (100 loc) · 2.95 KB
/
make_small_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python3
import math
import random
from pathlib import Path
from hashlib import sha256
import click
# Description: Creates a lot of directories and small files to
# performance test GridSync/Magic Folder with.
# Author: Meejah
# Date: 2023-01-20
_hash = sha256("a random seed".encode("utf8"))
def random_file_segment():
"""
A random filename segment for a directory or file
"""
_hash.update("a".encode("utf8"))
digest = _hash.hexdigest()
return digest[:random.randrange(1, len(digest))]
def generate_directories(base, count):
"""
A generator that creates a certain number of random
directories-names below the base
"""
# XXX would be nice to use hypothesis strategies to generate
# stuff, but .. that's hard?
for _ in range(count):
seg = random_file_segment()
yield base / seg
def generate_filename_segments(files):
"""
Generate some number of random filename segments
"""
for _ in range(files):
yield random_file_segment()
def generate_local_paths(output, files, directories):
"""
generator for a sequence of path names
"""
dir_names = generate_directories(output, directories)
file_names = generate_filename_segments(files)
# since we need at least one file in each directory (because we
# don't directly store directories) we place one of our files in
# each subdir
reusable_dirs = []
for d in dir_names:
reusable_dirs.append(d)
f = next(file_names)
d.mkdir()
path = d / f
yield path
idx = 0
for f in file_names:
idx = (idx + 1) % len(reusable_dirs)
yield reusable_dirs[idx] / f
@click.command()
@click.option(
"--files",
default=665,
help="Number of files to put data in"
)
@click.option(
"--directories",
default=237,
help="Number of folders to split data into"
)
@click.option(
"--output",
type=click.Path(exists=False, file_okay=False, dir_okay=True),
default="./small-test-case",
)
@click.option(
"--size",
default=3*1024*1024,
)
def small_files(files, directories, output, size):
"""
small-files test-case creator
"""
print(output)
if directories > files:
raise click.UsageError(
"Must have more files than directories"
)
data_per_file = math.ceil(float(size) / files)
# TODO: this should use Python's os.urandom() so
# that Windows folks can use it too
with open("/dev/urandom", "rb") as urandom:
def generate_data():
# could introduce some variance...
for _ in range(files):
yield urandom.read(data_per_file)
outp = Path(output)
outp.mkdir()
data = generate_data()
for path in generate_local_paths(outp, files, directories):
print(path)
with path.open('wb') as output:
output.write(next(data))
if __name__ == "__main__":
small_files()