Implement maximum file sizes (#62)

trunk-io · Feb 11, 2025 · fc34cf3 · fc34cf3
1 parent 6c18693
commit fc34cf3
Show file tree

Hide file tree

Showing 7 changed files with 43 additions and 26 deletions.
diff --git a/.tk/config.toml b/.tk/config.toml
@@ -39,7 +39,7 @@ circleci.check = true
 circleci.version = "0.1.31425"
 cue_format.fmt = true
 cue_format.version = "0.12.0"
-# do_not_land.check = true
+do_not_land.check = true
 dart.check = true
 dart.fix = true
 dart.fmt = true
@@ -65,7 +65,7 @@ hadolint.check = true
 hadolint.version = "2.12.1-beta"
 isort.fmt = true
 isort.version = "6.0.0"
-# no_curly_quotes.check = true
+no_curly_quotes.check = true
 osv-scanner.check = true
 osv-scanner.version = "1.9.2"
 oxipng.fmt = true

diff --git a/.tk/version b/.tk/version
@@ -1 +1 @@
-v0.0.24
+v0.0.25
diff --git a/rules/banned_strings_check.star b/rules/banned_strings_check.star
@@ -11,17 +11,21 @@ def banned_strings_check(
     label = native.label_string(":" + name)
     native.string_list(name = "strings", default = strings)
 
-    def impl(ctx: CheckContext, result: FilesResult):
+    def impl(ctx: CheckContext, targets: CheckTargets):
         re = regex.Regex("|".join([regex.escape(word) for word in ctx.inputs().strings]))
-        for batch in make_batches(result.files):
+        paths = [file.path for file in targets.files]
+        for batch in make_batches(paths):
             description = "{label} ({num_files} files)".format(label = label, num_files = len(batch))
             ctx.spawn(description = description, weight = len(batch)).then(run, ctx, re, batch)
 
     def run(ctx: CheckContext, re: regex.Regex, batch: list[str]):
         results = []
         for file in batch:
             abspath = fs.join(ctx.paths().workspace_dir, file)
-            data = fs.read_file(abspath)
+            data = fs.try_read_file(abspath)
+            if data == None:
+                # Skip files that are not UTF-8 encoded.
+                continue
             line_index = lines.LineIndex(data)
             for match in re.finditer(data):
                 line_col = line_index.line_col(match.start(0))

diff --git a/rules/check.star b/rules/check.star
@@ -8,17 +8,17 @@ load("util:tarif.star", "tarif")
 # Bucket
 
 BucketContext = record(
-    files = list[str],
+    paths = list[str],
 )
 
 # Bucket all files into a single bucket to run from the workspace root.
 def bucket_by_workspace(ctx: BucketContext) -> dict[str, list[str]]:
-    return {".": ctx.files}
+    return {".": ctx.paths}
 
 # Bucket files to run from the directory containing the specified file.
 def _bucket_by_files(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
     directories = {}
-    for file in ctx.files:
+    for file in ctx.paths:
         directory = walk_up_to_find_dir_of_files(file, targets) or "."
         if directory not in directories:
             directories[directory] = []
@@ -35,12 +35,12 @@ def bucket_by_file(target: str):
 # If the file doesn't exist, then ignore.
 def _bucket_by_files_or_ignore(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
     directories = {}
-    for file in ctx.files:
-        directory = walk_up_to_find_dir_of_files(file, targets)
+    for path in ctx.paths:
+        directory = walk_up_to_find_dir_of_files(path, targets)
         if directory:
             if directory not in directories:
                 directories[directory] = []
-            directories[directory].append(fs.relative_to(file, directory))
+            directories[directory].append(fs.relative_to(path, directory))
     return directories
 
 def bucket_by_files_or_ignore(targets: list[str]):
@@ -52,8 +52,8 @@ def bucket_by_file_or_ignore(target: str):
 # Bucket files to run from the directory containing the specified file on each directory containing that file.
 def _bucket_directories_by_files(targets: list[str], ctx: BucketContext) -> dict[str, list[str]]:
     directories = set()
-    for file in ctx.files:
-        directory = walk_up_to_find_dir_of_files(file, targets) or "."
+    for path in ctx.paths:
+        directory = walk_up_to_find_dir_of_files(path, targets) or "."
         directories.add(directory)
     return {".": list(directories)}
 
@@ -66,18 +66,18 @@ def bucket_directories_by_file(target: str):
 # Bucket files to run from the parent directory of each file.
 def bucket_by_dir(ctx: BucketContext) -> dict[str, list[str]]:
     directories = {}
-    for file in ctx.files:
-        directory = fs.dirname(file)
+    for path in ctx.paths:
+        directory = fs.dirname(path)
         if directory not in directories:
             directories[directory] = []
-        directories[directory].append(fs.filename(file))
+        directories[directory].append(fs.filename(path))
     return directories
 
 # Run on the directories containing the files.
 def bucket_dirs_of_files(ctx: BucketContext) -> dict[str, list[str]]:
     directories = set()
-    for file in ctx.files:
-        directory = fs.dirname(file)
+    for path in ctx.paths:
+        directory = fs.dirname(path)
         directories.add(directory)
     return {".": list(directories)}
 
@@ -230,15 +230,24 @@ def check(
         bucket: typing.Callable = bucket_by_workspace,
         read_output_file: None | typing.Callable = None,
         update_command_line_replacements: None | typing.Callable = None,
+        maximum_file_size = 1024 * 1024,  # 1 MB
         affects_cache = [],
         timeout_ms = 300000,  # 5 minutes
         cache_results = False,
         cache_ttl = 60 * 60 * 24,  # 24 hours
         target_description: str = "targets"):
     label = native.label_string(":" + name)
 
-    def impl(ctx: CheckContext, result: FilesResult):
-        buckets = bucket(BucketContext(files = result.files))
+    def impl(ctx: CheckContext, targets: CheckTargets):
+        # Filter files too large
+        paths = []
+        for file in targets.files:
+            if file.size > ctx.inputs().maximum_file_size:
+                continue
+            paths.append(file.path)
+
+        # Bucket by run from directory
+        buckets = bucket(BucketContext(paths = paths))
         for (run_from, targets) in buckets.items():
             batch(ctx, run_from, targets, ctx.inputs().batch_size)
 
@@ -319,6 +328,7 @@ def check(
     # Allow the user to override some settings.
     native.string(name = name + "_command", default = command)
     native.int(name = name + "_batch_size", default = batch_size)
+    native.int(name = name + "_maximum_file_size", default = maximum_file_size)
     native.bool(name = name + "_bisect", default = bisect)
     native.int_list(name = name + "_success_codes", default = success_codes)
     native.int_list(name = name + "_error_codes", default = error_codes)
@@ -331,6 +341,7 @@ def check(
             "tool": tool,
             "command": ":" + name + "_command",
             "batch_size": ":" + name + "_batch_size",
+            "maximum_file_size": ":" + name + "_maximum_file_size",
             "bisect": ":" + name + "_bisect",
             "success_codes": ":" + name + "_success_codes",
             "error_codes": ":" + name + "_error_codes",

diff --git a/tool/biome/package.star b/tool/biome/package.star
@@ -28,7 +28,7 @@ _SEVERITY_TO_LEVEL = {
 
 # It's non-trivial to turn this into text edits:
 # https://github.com/biomejs/biome/blob/0bb86c7bbabebace7ce0f17638f6f58585dae7d6/crates/biome_lsp/src/utils.rs#L26
-def _create_edits_from_diff(line_index, diff_data, file_path):  # DONOTLAND
+def _create_edits_from_diff(line_index, diff_data, file_path):
     dictionary = diff_data["dictionary"]
     ops = diff_data["ops"]
 

diff --git a/tool/sleep/package.star b/tool/sleep/package.star
@@ -1,6 +1,6 @@
-def _check_impl(ctx: CheckContext, result: FilesResult):
-    for file in result.files:
-        description = "sleep-check {file}".format(file = file)
+def _check_impl(ctx: CheckContext, targets: CheckTargets):
+    for file in targets.files:
+        description = "sleep-check {file}".format(file = file.path)
         ctx.spawn(description = description).then(_check_file)
 
 def _check_file():

diff --git a/util/batch.star b/util/batch.star
@@ -1,5 +1,7 @@
-def make_batches(files: list[str], max_batch_size: int = 64) -> list[list[str]]:
+def make_batches(files: list[typing.Any], max_batch_size: int = 64) -> list[list[typing.Any]]:
     num_files = len(files)
+    if num_files == 0:
+        return []
     num_batches = (num_files + max_batch_size - 1) // max_batch_size  # Calculate the minimum number of batches needed
     avg_batch_size = num_files // num_batches  # Calculate the average size of each batch
     remainder = num_files % num_batches  # Calculate how many extra files need to be distributed