Skip to content

Commit

Permalink
Implement maximum file sizes (#62)
Browse files Browse the repository at this point in the history
  • Loading branch information
det authored Feb 11, 2025
1 parent 6c18693 commit fc34cf3
Show file tree
Hide file tree
Showing 7 changed files with 43 additions and 26 deletions.
4 changes: 2 additions & 2 deletions .tk/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ circleci.check = true
circleci.version = "0.1.31425"
cue_format.fmt = true
cue_format.version = "0.12.0"
# do_not_land.check = true
do_not_land.check = true
dart.check = true
dart.fix = true
dart.fmt = true
Expand All @@ -65,7 +65,7 @@ hadolint.check = true
hadolint.version = "2.12.1-beta"
isort.fmt = true
isort.version = "6.0.0"
# no_curly_quotes.check = true
no_curly_quotes.check = true
osv-scanner.check = true
osv-scanner.version = "1.9.2"
oxipng.fmt = true
Expand Down
2 changes: 1 addition & 1 deletion .tk/version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v0.0.24
v0.0.25
10 changes: 7 additions & 3 deletions rules/banned_strings_check.star
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,21 @@ def banned_strings_check(
label = native.label_string(":" + name)
native.string_list(name = "strings", default = strings)

def impl(ctx: CheckContext, result: FilesResult):
def impl(ctx: CheckContext, targets: CheckTargets):
re = regex.Regex("|".join([regex.escape(word) for word in ctx.inputs().strings]))
for batch in make_batches(result.files):
paths = [file.path for file in targets.files]
for batch in make_batches(paths):
description = "{label} ({num_files} files)".format(label = label, num_files = len(batch))
ctx.spawn(description = description, weight = len(batch)).then(run, ctx, re, batch)

def run(ctx: CheckContext, re: regex.Regex, batch: list[str]):
results = []
for file in batch:
abspath = fs.join(ctx.paths().workspace_dir, file)
data = fs.read_file(abspath)
data = fs.try_read_file(abspath)
if data == None:
# Skip files that are not UTF-8 encoded.
continue
line_index = lines.LineIndex(data)
for match in re.finditer(data):
line_col = line_index.line_col(match.start(0))
Expand Down
41 changes: 26 additions & 15 deletions rules/check.star
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@ load("util:tarif.star", "tarif")
# Bucket

BucketContext = record(
files = list[str],
paths = list[str],
)

# Bucket all files into a single bucket to run from the workspace root.
def bucket_by_workspace(ctx: BucketContext) -> dict[str, list[str]]:
return {".": ctx.files}
return {".": ctx.paths}

# Bucket files to run from the directory containing the specified file.
def _bucket_by_files(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
directories = {}
for file in ctx.files:
for file in ctx.paths:
directory = walk_up_to_find_dir_of_files(file, targets) or "."
if directory not in directories:
directories[directory] = []
Expand All @@ -35,12 +35,12 @@ def bucket_by_file(target: str):
# If the file doesn't exist, then ignore.
def _bucket_by_files_or_ignore(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
directories = {}
for file in ctx.files:
directory = walk_up_to_find_dir_of_files(file, targets)
for path in ctx.paths:
directory = walk_up_to_find_dir_of_files(path, targets)
if directory:
if directory not in directories:
directories[directory] = []
directories[directory].append(fs.relative_to(file, directory))
directories[directory].append(fs.relative_to(path, directory))
return directories

def bucket_by_files_or_ignore(targets: list[str]):
Expand All @@ -52,8 +52,8 @@ def bucket_by_file_or_ignore(target: str):
# Bucket files to run from the directory containing the specified file on each directory containing that file.
def _bucket_directories_by_files(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
directories = set()
for file in ctx.files:
directory = walk_up_to_find_dir_of_files(file, targets) or "."
for path in ctx.paths:
directory = walk_up_to_find_dir_of_files(path, targets) or "."
directories.add(directory)
return {".": list(directories)}

Expand All @@ -66,18 +66,18 @@ def bucket_directories_by_file(target: str):
# Bucket files to run from the parent directory of each file.
def bucket_by_dir(ctx: BucketContext) -> dict[str, list[str]]:
directories = {}
for file in ctx.files:
directory = fs.dirname(file)
for path in ctx.paths:
directory = fs.dirname(path)
if directory not in directories:
directories[directory] = []
directories[directory].append(fs.filename(file))
directories[directory].append(fs.filename(path))
return directories

# Run on the directories containing the files.
def bucket_dirs_of_files(ctx: BucketContext) -> dict[str, list[str]]:
directories = set()
for file in ctx.files:
directory = fs.dirname(file)
for path in ctx.paths:
directory = fs.dirname(path)
directories.add(directory)
return {".": list(directories)}

Expand Down Expand Up @@ -230,15 +230,24 @@ def check(
bucket: typing.Callable = bucket_by_workspace,
read_output_file: None | typing.Callable = None,
update_command_line_replacements: None | typing.Callable = None,
maximum_file_size = 1024 * 1024, # 1 MB
affects_cache = [],
timeout_ms = 300000, # 5 minutes
cache_results = False,
cache_ttl = 60 * 60 * 24, # 24 hours
target_description: str = "targets"):
label = native.label_string(":" + name)

def impl(ctx: CheckContext, result: FilesResult):
buckets = bucket(BucketContext(files = result.files))
def impl(ctx: CheckContext, targets: CheckTargets):
# Filter files too large
paths = []
for file in targets.files:
if file.size > ctx.inputs().maximum_file_size:
continue
paths.append(file.path)

# Bucket by run from directory
buckets = bucket(BucketContext(paths = paths))
for (run_from, targets) in buckets.items():
batch(ctx, run_from, targets, ctx.inputs().batch_size)

Expand Down Expand Up @@ -319,6 +328,7 @@ def check(
# Allow the user to override some settings.
native.string(name = name + "_command", default = command)
native.int(name = name + "_batch_size", default = batch_size)
native.int(name = name + "_maximum_file_size", default = maximum_file_size)
native.bool(name = name + "_bisect", default = bisect)
native.int_list(name = name + "_success_codes", default = success_codes)
native.int_list(name = name + "_error_codes", default = error_codes)
Expand All @@ -331,6 +341,7 @@ def check(
"tool": tool,
"command": ":" + name + "_command",
"batch_size": ":" + name + "_batch_size",
"maximum_file_size": ":" + name + "_maximum_file_size",
"bisect": ":" + name + "_bisect",
"success_codes": ":" + name + "_success_codes",
"error_codes": ":" + name + "_error_codes",
Expand Down
2 changes: 1 addition & 1 deletion tool/biome/package.star
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ _SEVERITY_TO_LEVEL = {

# It's non-trivial to turn this into text edits:
# https://github.com/biomejs/biome/blob/0bb86c7bbabebace7ce0f17638f6f58585dae7d6/crates/biome_lsp/src/utils.rs#L26
def _create_edits_from_diff(line_index, diff_data, file_path): # DONOTLAND
def _create_edits_from_diff(line_index, diff_data, file_path):
dictionary = diff_data["dictionary"]
ops = diff_data["ops"]

Expand Down
6 changes: 3 additions & 3 deletions tool/sleep/package.star
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
def _check_impl(ctx: CheckContext, result: FilesResult):
for file in result.files:
description = "sleep-check {file}".format(file = file)
def _check_impl(ctx: CheckContext, targets: CheckTargets):
for file in targets.files:
description = "sleep-check {file}".format(file = file.path)
ctx.spawn(description = description).then(_check_file)

def _check_file():
Expand Down
4 changes: 3 additions & 1 deletion util/batch.star
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
def make_batches(files: list[str], max_batch_size: int = 64) -> list[list[str]]:
def make_batches(files: list[typing.Any], max_batch_size: int = 64) -> list[list[typing.Any]]:
num_files = len(files)
if num_files == 0:
return []
num_batches = (num_files + max_batch_size - 1) // max_batch_size # Calculate the minimum number of batches needed
avg_batch_size = num_files // num_batches # Calculate the average size of each batch
remainder = num_files % num_batches # Calculate how many extra files need to be distributed
Expand Down

0 comments on commit fc34cf3

Please sign in to comment.