Skip to content

Commit

Permalink
[batch] Support projects with different disk sizes (#4601)
Browse files Browse the repository at this point in the history
We may need to do this and have per-job templates because chrome needs
it. Maybe I can get rid of this and replace it with per-job templates
and make every oversized oss-fuzz project use the same config with the
same disk size (right now it varies).
  • Loading branch information
jonathanmetzman authored Jan 14, 2025
1 parent 3432362 commit a76b922
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 23 deletions.
8 changes: 6 additions & 2 deletions src/clusterfuzz/_internal/cron/project_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
JOB_TEMPLATE = ('{build_type} = {build_bucket_path}\n'
'PROJECT_NAME = {project_name}\n'
'SUMMARY_PREFIX = {project_name}\n'
'MANAGED = True\n')
'MANAGED = True\n'
'DISK_SIZE_GB = {disk_size_gb}\n')

OBJECT_VIEWER_IAM_ROLE = 'roles/storage.objectViewer'
OBJECT_ADMIN_IAM_ROLE = 'roles/storage.objectAdmin'
Expand Down Expand Up @@ -808,11 +809,14 @@ def _sync_job(self, project, info, corpus_bucket_name, quarantine_bucket_name,
project, info, template.engine, template.memory_tool,
template.architecture)
base_project_name = self._get_base_project_name(project)
oss_fuzz_project = ndb.Key(data_types.OssFuzzProject, project).get()
oss_fuzz_gb = oss_fuzz_project.disk_size_gb if oss_fuzz_project else None
job.environment_string = JOB_TEMPLATE.format(
build_type=self._build_type,
build_bucket_path=build_bucket_path,
engine=template.engine,
project_name=base_project_name)
project_name=base_project_name,
disk_size_gb=oss_fuzz_gb)

# Centipede requires a separate build of the sanitized binary.
if template.engine == 'centipede':
Expand Down
22 changes: 10 additions & 12 deletions src/clusterfuzz/_internal/google_cloud_utils/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@
from clusterfuzz._internal.datastore import data_types
from clusterfuzz._internal.datastore import ndb_utils
from clusterfuzz._internal.metrics import logs
from clusterfuzz._internal.system import environment

# TODO(metzman): Change to from . import credentials when we are done
# developing.
from . import credentials

_local = threading.local()
Expand Down Expand Up @@ -236,12 +235,6 @@ def _get_batch_config():
return local_config.BatchConfig()


def _get_job(job_name):
"""Returns the Job entity named by |job_name|. This function was made to make
mocking easier."""
return data_types.Job.query(data_types.Job.name == job_name).get()


def is_no_privilege_workload(command, job_name):
return is_remote_task(command, job_name)

Expand Down Expand Up @@ -273,7 +266,12 @@ def _get_config_names(
suffix = '-NONPREEMPTIBLE-UNPRIVILEGED'
job = job_map[task.job_type]
platform = job.platform if not utils.is_oss_fuzz() else 'LINUX'
config_map[(task.command, task.job_type)] = f'{platform}{suffix}'
disk_size_gb = environment.get_value(
'DISK_SIZE_GB', env=job.get_environment())
config_map[(task.command, task.job_type)] = (f'{platform}{suffix}',
disk_size_gb)
# TODO(metzman): Come up with a more systematic way for configs to
# be overridden by jobs.
return config_map


Expand Down Expand Up @@ -310,12 +308,11 @@ def _get_specs_from_config(batch_tasks) -> Dict:
if (task.command, task.job_type) in specs:
# Don't repeat work for no reason.
continue
config_name = config_map[(task.command, task.job_type)]
config_name, disk_size_gb = config_map[(task.command, task.job_type)]

instance_spec = batch_config.get('mapping').get(config_name)
if instance_spec is None:
raise ValueError(f'No mapping for {config_name}')
config_name = config_map[(task.command, task.job_type)]
project_name = batch_config.get('project')
clusterfuzz_release = instance_spec.get('clusterfuzz_release', 'prod')
# Lower numbers are a lower priority, meaning less likely to run From:
Expand All @@ -332,10 +329,11 @@ def _get_specs_from_config(batch_tasks) -> Dict:
if should_retry and task.command == 'corpus_pruning':
should_retry = False # It is naturally retried the next day.

disk_size_gb = (disk_size_gb or instance_spec['disk_size_gb'])
subconfig = subconfig_map[config_name]
spec = BatchWorkloadSpec(
docker_image=instance_spec['docker_image'],
disk_size_gb=instance_spec['disk_size_gb'],
disk_size_gb=disk_size_gb,
disk_type=instance_spec['disk_type'],
user_data=instance_spec['user_data'],
service_account_email=instance_spec['service_account_email'],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,9 @@ def setUp(self):
email='primary@example.com').put()

# Existing project settings. Should not get modified.
data_types.OssFuzzProject(id='lib1', name='lib1', cpu_weight=1.5).put()
# Also test disk size.
data_types.OssFuzzProject(
id='lib1', name='lib1', cpu_weight=1.5, disk_size_gb=500).put()

# Should get deleted.
data_types.OssFuzzProject(id='old_lib', name='old_lib').put()
Expand Down Expand Up @@ -341,6 +343,7 @@ def test_execute(self):
'PROJECT_NAME = lib1\n'
'SUMMARY_PREFIX = lib1\n'
'MANAGED = True\n'
'DISK_SIZE_GB = 500\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib1/lib1-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib1-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -366,6 +369,7 @@ def test_execute(self):
'PROJECT_NAME = lib3\n'
'SUMMARY_PREFIX = lib3\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib3/lib3-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib3-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -388,6 +392,7 @@ def test_execute(self):
'PROJECT_NAME = lib3\n'
'SUMMARY_PREFIX = lib3\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds-i386/lib3/lib3-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib3-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -410,6 +415,7 @@ def test_execute(self):
'PROJECT_NAME = lib3\n'
'SUMMARY_PREFIX = lib3\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib3/lib3-memory-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib3-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -433,6 +439,7 @@ def test_execute(self):
'PROJECT_NAME = lib3\n'
'SUMMARY_PREFIX = lib3\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib3/lib3-undefined-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib3-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -454,6 +461,7 @@ def test_execute(self):
'PROJECT_NAME = lib1\n'
'SUMMARY_PREFIX = lib1\n'
'MANAGED = True\n'
'DISK_SIZE_GB = 500\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds-afl/lib1/lib1-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib1-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -478,6 +486,7 @@ def test_execute(self):
'PROJECT_NAME = lib5\n'
'SUMMARY_PREFIX = lib5\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib5/lib5-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib5-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -501,6 +510,7 @@ def test_execute(self):
'PROJECT_NAME = lib6\n'
'SUMMARY_PREFIX = lib6\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib6/lib6-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib6-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -522,6 +532,7 @@ def test_execute(self):
'PROJECT_NAME = lib7\n'
'SUMMARY_PREFIX = lib7\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
'clusterfuzz-builds/lib7/lib7-address-%s.srcmap.json\n'
'FUZZ_LOGS_BUCKET = lib7-logs.clusterfuzz-external.appspot.com\n'
Expand All @@ -543,6 +554,7 @@ def test_execute(self):
'PROJECT_NAME = lib9\n'
'SUMMARY_PREFIX = lib9\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'EXTRA_BUILD_BUCKET_PATH = '
'gs://clusterfuzz-builds-centipede/lib9/lib9-address-([0-9]+).zip\n'
'REVISION_VARS_URL = https://commondatastorage.googleapis.com/'
Expand Down Expand Up @@ -608,7 +620,7 @@ def test_execute(self):
'name':
'lib1',
'disk_size_gb':
None,
500,
'service_account':
'lib1@serviceaccount.com',
'high_end':
Expand Down Expand Up @@ -1902,6 +1914,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/a-b/libfuzzer/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //a/b\nSUMMARY_PREFIX = //a/b\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'ASAN_VAR = VAL\n'
Expand All @@ -1919,6 +1932,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/a-b/libfuzzer/memory/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //a/b\nSUMMARY_PREFIX = //a/b\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'EXPERIMENTAL = True\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
Expand All @@ -1937,6 +1951,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/c-d/libfuzzer/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //c/d\nSUMMARY_PREFIX = //c/d\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'ASAN_VAR = VAL\n'
Expand All @@ -1954,6 +1969,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/e-f/libfuzzer/none/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //e/f\nSUMMARY_PREFIX = //e/f\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'BOOL_VAR = True\n'
Expand All @@ -1974,6 +1990,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket-dbg/a-b/libfuzzer/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //a/b\nSUMMARY_PREFIX = //a/b\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'ASAN_VAR = VAL-dbg\n'
Expand All @@ -1993,6 +2010,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/a-b/honggfuzz/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //a/b\nSUMMARY_PREFIX = //a/b\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'MINIMIZE_JOB_OVERRIDE = libfuzzer_asan_a-b\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
Expand All @@ -2010,6 +2028,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket-dbg/a-b/honggfuzz/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //a/b\nSUMMARY_PREFIX = //a/b\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'MINIMIZE_JOB_OVERRIDE = libfuzzer_asan_a-b_dbg\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
Expand All @@ -2029,6 +2048,7 @@ def test_execute(self):
'FUZZ_TARGET_BUILD_BUCKET_PATH = '
'gs://bucket/c-d/googlefuzztest/address/%TARGET%/([0-9]+).zip\n'
'PROJECT_NAME = //c/d\nSUMMARY_PREFIX = //c/d\nMANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'BOOL_VAR = True\n'
Expand All @@ -2047,6 +2067,7 @@ def test_execute(self):
'PROJECT_NAME = android\n'
'SUMMARY_PREFIX = android\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'BOOL_VAR = True\n'
Expand All @@ -2067,6 +2088,7 @@ def test_execute(self):
'PROJECT_NAME = android\n'
'SUMMARY_PREFIX = android\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'MINIMIZE_JOB_OVERRIDE = libfuzzer_asan_android_pixel8\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
Expand All @@ -2088,6 +2110,7 @@ def test_execute(self):
'PROJECT_NAME = android\n'
'SUMMARY_PREFIX = android\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'ASAN_VAR = VAL-android\n'
Expand All @@ -2109,6 +2132,7 @@ def test_execute(self):
'PROJECT_NAME = android\n'
'SUMMARY_PREFIX = android\n'
'MANAGED = True\n'
'DISK_SIZE_GB = None\n'
'DISABLE_DISCLOSURE = True\n'
'FILE_GITHUB_ISSUE = False\n'
'BOOL_VAR = True\n'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

@test_utils.with_cloud_emulators('datastore')
class GetSpecsFromConfigTest(unittest.TestCase):
"""Tests for get_spec_from_config."""
"""Tests for _get_specs_from_config."""

def setUp(self):
self.maxDiff = None
Expand All @@ -40,8 +40,8 @@ def setUp(self):
)

def test_nonpreemptible(self):
"""Tests that get_spec_from_config works for non-preemptibles as
expected."""
"""Tests that _get_specs_from_config works for non-preemptibles as
expected."""
spec = _get_spec_from_config('analyze', self.job.name)
expected_spec = batch.BatchWorkloadSpec(
clusterfuzz_release='prod',
Expand All @@ -64,8 +64,8 @@ def test_nonpreemptible(self):

self.assertCountEqual(spec, expected_spec)

def test_fuzz_get_spec_from_config(self):
"""Tests that get_spec_from_config works for fuzz tasks as expected."""
def test_fuzz_get_specs_from_config(self):
"""Tests that _get_specs_from_config works for fuzz tasks as expected."""
job = data_types.Job(name='libfuzzer_chrome_asan', platform='LINUX')
job.put()
spec = _get_spec_from_config('fuzz', job.name)
Expand All @@ -92,7 +92,7 @@ def test_fuzz_get_spec_from_config(self):

def test_corpus_pruning(self):
"""Tests that corpus pruning uses a spec of 24 hours and a different one
than normal."""
than normal."""
pruning_spec = _get_spec_from_config('corpus_pruning', self.job.name)
self.assertEqual(pruning_spec.max_run_duration, f'{24 * 60 * 60}s')
normal_spec = _get_spec_from_config('analyze', self.job.name)
Expand All @@ -104,6 +104,50 @@ def test_corpus_pruning(self):
pruning_spec2 = _get_spec_from_config('corpus_pruning', job.name)
self.assertEqual(pruning_spec, pruning_spec2)

def test_get_specs_from_config_disk_size(self):
"""Tests that DISK_SIZE_GB is respected."""
size = 500
data_types.Job(
environment_string=f'DISK_SIZE_GB = {size}\n',
platform='LINUX',
name='libfuzzer_asan_test').put()

spec = batch._get_specs_from_config(
[batch.BatchTask('fuzz', 'libfuzzer_asan_test', None)])
self.assertEqual(spec['fuzz', 'libfuzzer_asan_test'].disk_size_gb, size)

def test_get_specs_from_config_no_disk_size(self):
"""Test that disk_size_gb isn't mandatory."""
data_types.Job(platform='LINUX', name='libfuzzer_asan_test').put()
spec = batch._get_specs_from_config(
[batch.BatchTask('fuzz', 'libfuzzer_asan_test', None)])
conf = batch._get_batch_config()
expected_size = (
conf.get('mapping')['LINUX-PREEMPTIBLE-UNPRIVILEGED']['disk_size_gb'])
self.assertEqual(spec['fuzz', 'libfuzzer_asan_test'].disk_size_gb,
expected_size)

def test_get_specs_from_config_with_disk_size_override(self):
"""Tests that disk_size_gb can be overridden by the job environment."""
job_name = 'libfuzzer_asan_test'
original_size = 75
overridden_size = 200
# First, create a job with the original disk size
data_types.Job(
environment_string=f'DISK_SIZE_GB = {original_size}\n',
platform='LINUX',
name=job_name).put()

# Then override it by creating a new job with a larger disk size
data_types.Job(
environment_string=f'DISK_SIZE_GB = {overridden_size}\n',
platform='LINUX',
name=job_name).put()

spec = batch._get_specs_from_config(
[batch.BatchTask('fuzz', job_name, None)])
self.assertEqual(spec['fuzz', job_name].disk_size_gb, overridden_size)


def _get_spec_from_config(command, job_name):
return list(
Expand Down
2 changes: 1 addition & 1 deletion src/local/butler/py_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self, *args, **kwargs):
self.slow_tests = []

def startTest(self, test):
self._start_time = time.time()
self._start_time = time.time() # pylint: disable=attribute-defined-outside-init
super().startTest(test)

def addSuccess(self, test):
Expand Down

0 comments on commit a76b922

Please sign in to comment.