diff --git a/clients/log_meta_stream.py b/clients/log_meta_stream.py index 960eb93..bba8333 100644 --- a/clients/log_meta_stream.py +++ b/clients/log_meta_stream.py @@ -28,12 +28,12 @@ def main(): # Test out some log files file_list = [os.path.join('../data/log', child) for child in os.listdir('../data/log')] for filename in file_list: - with open(filename,'rb') as file: + with open(filename,'rb') as f: # Skip OS generated files if '.DS_Store' in filename: continue - md5 = workbench.store_sample(filename, file.read(), 'log') + md5 = workbench.store_sample(filename, f.read(), 'log') results = workbench.work_request('view_log_meta', md5) print 'Filename: %s\n' % (filename) pprint.pprint(results) diff --git a/clients/pcap_bro_raw.py b/clients/pcap_bro_raw.py index f93bd0d..ee88971 100644 --- a/clients/pcap_bro_raw.py +++ b/clients/pcap_bro_raw.py @@ -34,8 +34,8 @@ def main(): # Skip OS generated files if '.DS_Store' in filename: continue - with open(filename,'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'pcap') + with open(filename,'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'pcap') results = workbench.work_request('pcap_bro', md5) # Results is just a dictionary of Bro log file names and their MD5s in workbench diff --git a/clients/pcap_bro_urls.py b/clients/pcap_bro_urls.py index 1a98cdc..2ee7eaa 100644 --- a/clients/pcap_bro_urls.py +++ b/clients/pcap_bro_urls.py @@ -33,8 +33,8 @@ def main(): # Skip OS generated files if '.DS_Store' in filename: continue - with open(filename,'rb') as file: - pcap_md5 = workbench.store_sample(filename, file.read(), 'pcap') + with open(filename,'rb') as f: + pcap_md5 = workbench.store_sample(filename, f.read(), 'pcap') results = workbench.work_request('pcap_bro', pcap_md5) # Just grab the http log diff --git a/clients/pcap_bro_view.py b/clients/pcap_bro_view.py index 7d4720d..4d1825f 100644 --- a/clients/pcap_bro_view.py +++ b/clients/pcap_bro_view.py @@ -35,8 +35,8 @@ def main(): if '.DS_Store' in filename: continue # Process the pcap file - with open(filename,'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'pcap') + with open(filename,'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'pcap') results = workbench.work_request('view_pcap', md5) print '\n<<< %s >>>' % filename pprint.pprint(results) diff --git a/clients/pcap_meta.py b/clients/pcap_meta.py index f44091d..09e531a 100644 --- a/clients/pcap_meta.py +++ b/clients/pcap_meta.py @@ -32,8 +32,8 @@ def main(): # Skip OS generated files if '.DS_Store' in filename: continue - with open(filename,'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'pcap') + with open(filename,'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'pcap') results = workbench.work_request('view_pcap', md5) print 'Filename: %s results:' % (filename) pprint.pprint(results) diff --git a/clients/pe_indexer.py b/clients/pe_indexer.py index 29d1fbf..6ce1be4 100644 --- a/clients/pe_indexer.py +++ b/clients/pe_indexer.py @@ -33,8 +33,8 @@ def main(): if '.DS_Store' in filename: continue - with open(filename, 'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'pe') + with open(filename, 'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'pe') # Index the strings and features output (notice we can ask for any worker output) # Also (super important) it all happens on the server side. @@ -68,7 +68,7 @@ def main(): print 'Number of hits: %d' % results['hits']['total'] print 'Max Score: %f' % results['hits']['max_score'] pprint.pprint([(hit['fields']['md5'], hit['fields']['sparse_features.imported_symbols']) - for hit in results['hits']['hits']]) + for hit in results['hits']['hits']]) except TypeError: print 'Probably using a Stub Indexer, if you want an ELS Indexer see the readme' diff --git a/clients/pe_peid.py b/clients/pe_peid.py index 4fddc53..eb5c7e0 100644 --- a/clients/pe_peid.py +++ b/clients/pe_peid.py @@ -33,8 +33,8 @@ def main(): # Skip OS generated files if '.DS_Store' in filename: continue - with open(filename,'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'pe') + with open(filename,'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'pe') results = workbench.work_request('pe_peid', md5) pprint.pprint(results) diff --git a/clients/upload_dir.py b/clients/upload_dir.py index e1d5f17..81c43af 100644 --- a/clients/upload_dir.py +++ b/clients/upload_dir.py @@ -5,15 +5,6 @@ import ConfigParser import hashlib -# We're not using this but it might be handy to someone -''' -def md5_for_file(path, block_size=256*128): - md5 = hashlib.md5() - with open(path,'rb') as f: - for chunk in iter(lambda: f.read(block_size), b''): - md5.update(chunk) - return md5.hexdigest() -''' def main(): ''' This client pushes a big directory of different files into Workbench ''' @@ -25,7 +16,8 @@ def main(): port = workbench_conf.getint('workbench', 'server_port') parser = argparse.ArgumentParser() - parser.add_argument('-d', '--data-dir', type=str, default='../data/pdf/bad', help='Directory of files to import into the workbench server') + parser.add_argument('-d', '--data-dir', type=str, default='../data/pdf/bad', help='Directory\ + of files to import into the workbench server') parser.add_argument('-t', '--tag', type=str, default='log', help='Type_tag of the files being imported') parser.add_argument('-p', '--port', type=int, default=port, help='port used by workbench server') parser.add_argument('-s', '--server', type=str, default=server, help='location of workbench server') diff --git a/clients/upload_file.py b/clients/upload_file.py index dfb8a70..b04173c 100644 --- a/clients/upload_file.py +++ b/clients/upload_file.py @@ -5,6 +5,16 @@ import argparse import ConfigParser +# We're not using this but it might be handy to someone +''' +def md5_for_file(path, block_size=256*128): + md5 = hashlib.md5() + with open(path,'rb') as f: + for chunk in iter(lambda: f.read(block_size), b''): + md5.update(chunk) + return md5.hexdigest() +''' + def main(): ''' This client pushes a file into Workbench ''' @@ -15,7 +25,8 @@ def main(): port = workbench_conf.getint('workbench', 'server_port') parser = argparse.ArgumentParser() - parser.add_argument('-f', '--loadfile', type=str, default='../data/log/system.log', help='File to import into the workbench server') + parser.add_argument('-f', '--loadfile', type=str, default='../data/log/system.log', + help='File to import into the workbench server') parser.add_argument('-p', '--port', type=int, default=port, help='port used by workbench server') parser.add_argument('-s', '--server', type=str, default=server, help='location of workbench server') args = parser.parse_args() diff --git a/clients/zip_file_extraction.py b/clients/zip_file_extraction.py index ca2e94e..6a54583 100644 --- a/clients/zip_file_extraction.py +++ b/clients/zip_file_extraction.py @@ -28,8 +28,8 @@ def main(): # Test out zip data file_list = [os.path.join('../data/zip', child) for child in os.listdir('../data/zip')] for filename in file_list: - with open(filename,'rb') as file: - md5 = workbench.store_sample(filename, file.read(), 'zip') + with open(filename,'rb') as f: + md5 = workbench.store_sample(filename, f.read(), 'zip') results = workbench.work_request('view', md5) print 'Filename: %s ' % (filename) pprint.pprint(results) diff --git a/server/bro/bro_log_reader.py b/server/bro/bro_log_reader.py index cab8944..613b94d 100644 --- a/server/bro/bro_log_reader.py +++ b/server/bro/bro_log_reader.py @@ -8,7 +8,7 @@ import time -class BroLogReader(): +class BroLogReader(object): ''' This class implements a python based Bro Log Reader. ''' def __init__(self, convert_datetimes=True): @@ -16,7 +16,7 @@ def __init__(self, convert_datetimes=True): self.delimiter = '\t' self.convert_datetimes = convert_datetimes - def read_log(self, logfile, max_rows=None): + def read_log(self, logfile): ''' The read_log method is a generator for rows in a Bro log. Usage: rows = my_bro_reader.read_log(logfile) for row in rows: @@ -29,7 +29,7 @@ def read_log(self, logfile, max_rows=None): logfile.seek(0) # First parse the header of the bro log - field_names, field_types = self._parse_bro_header(logfile) + field_names, _linez = self._parse_bro_header(logfile) # Note: SO stupid to write a csv reader, but csv.DictReader on Bro # files was doing something weird with generator output that @@ -130,6 +130,6 @@ def _cast_value(self, value): # Create a BRO log file reader and pull from the logfile BRO_LOG = BroLogReader() - RECORDS = BRO_LOG.read_log(open(OPTIONS.logfile, 'rb'), max_rows=10) + RECORDS = BRO_LOG.read_log(open(OPTIONS.logfile, 'rb')) for row in RECORDS: print row \ No newline at end of file diff --git a/server/plugin_manager.py b/server/plugin_manager.py index 2916dfe..3917b93 100644 --- a/server/plugin_manager.py +++ b/server/plugin_manager.py @@ -79,21 +79,6 @@ def add_plugin(self, f): mod_time = datetime.utcfromtimestamp(os.path.getmtime(f)) self.plugin_callback(plugin_info, mod_time) - # Currently disabled: Need to thing about this funcitonality - ''' - def run_test(self, handler): - previousDir = os.getcwd() - os.chdir(self.plugin_path) - try: - handler.test() - return True - except AttributeError: - print 'Failure for plugin: %s' % (handler.__name__) - print 'The file must have a top level test() method that runs' - return False - finally: - os.chdir(previousDir) - ''' def validate(self, handler): ''' Validate the plugin, each plugin must have the following: @@ -103,7 +88,7 @@ def validate(self, handler): ''' # Check for the test method first - methods = [name for name,_value in inspect.getmembers(handler, callable)] + methods = [name for name,_ in inspect.getmembers(handler, callable)] if 'test' not in methods: print 'Failure for plugin: %s' % (handler.__name__) print 'Validation Error: The file must have a top level test() method' diff --git a/server/workbench.py b/server/workbench.py index 7dd141a..0cb0a71 100644 --- a/server/workbench.py +++ b/server/workbench.py @@ -5,7 +5,6 @@ from gevent import signal as gevent_signal import signal import os -import argparse import zerorpc import zmq import logging @@ -32,7 +31,7 @@ from bro import bro_log_reader -class WorkBench(): +class WorkBench(object): ''' Workbench: Open Source Security Framework ''' def __init__(self, store_args=None, els_hosts=None, neo_uri=None): @@ -121,7 +120,7 @@ def stream_sample(self, md5, max_rows): if type_tag == 'bro': bro_log = bro_log_reader.BroLogReader(convert_datetimes=False) mem_file = StringIO.StringIO(raw_bytes) - generator = bro_log.read_log(mem_file, max_rows=max_rows) + generator = bro_log.read_log(mem_file) return generator elif type_tag == 'els_query': els_log = json.loads(raw_bytes) @@ -271,7 +270,7 @@ def work_request(self, worker_class, md5, subkeys=None): tmp = work_results[worker_class] for key in subkey.split('.'): tmp = tmp[key] - sub_results[key] = tmp + sub_results[key] = tmp work_results = sub_results except (KeyError, TypeError): raise RuntimeError('Could not get one or more subkeys for: %s' % (work_results)) @@ -321,7 +320,8 @@ def store_sample_set(self, md5_list): ''' for md5 in md5_list: if not self.has_sample(md5): - raise RuntimeError('Sample not found all items in sample_set must be in the datastore: %s (not found)' % (md5)) + raise RuntimeError('Sample not found all items in sample_set\ + must be in the datastore: %s (not found)' % (md5)) set_md5 = hashlib.md5(str(md5_list)).hexdigest() self._store_work_results({'md5_list':md5_list}, 'sample_set', set_md5) return set_md5 @@ -485,18 +485,6 @@ def run(): worker_cap = workbench_conf.getint('workbench', 'worker_cap') samples_cap = workbench_conf.getint('workbench', 'samples_cap') - # Parse the arguments (args overwrite configuration file settings) - ''' - parser = argparse.ArgumentParser() - parser.add_argument('-ds_uri', '--datastore_uri', type=str, default=None, help='machine used by workbench datastore') - parser.add_argument('-db', '--database', type=str, default=None, help='database used by workbench server') - args = parser.parse_args() - - # Overwrite if specified - datastore_uri = args.datastore_uri if (args.datastore_uri) else datastore_uri - database = args.database if (args.database) else database - ''' - # Spin up Workbench ZeroRPC try: store_args = {'uri': datastore_uri, 'database': database, 'worker_cap':worker_cap, 'samples_cap':samples_cap} diff --git a/utils/pcap_streamer.py b/utils/pcap_streamer.py index a180b8b..c9938b3 100644 --- a/utils/pcap_streamer.py +++ b/utils/pcap_streamer.py @@ -90,8 +90,8 @@ def store_file(self, filename): # Open the file and send it to workbench storage_name = "streaming_pcap" + str(self.pcap_index) print filename, storage_name - with open(filename,'rb') as file: - self.workbench.store_sample(storage_name, file.read(), 'pcap') + with open(filename,'rb') as f: + self.workbench.store_sample(storage_name, f.read(), 'pcap') self.pcap_index += 1 # Close workbench client @@ -101,7 +101,8 @@ def subprocess_manager(self, exec_args): try: self.tcpdump_process = subprocess.Popen(exec_args.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError: - raise RuntimeError('Could not run tcpdump executable (either not installed or not in path): %s' % (exec_args)) + raise RuntimeError('Could not run tcpdump executable (either not \ + installed or not in path): %s' % (exec_args)) out, err = self.tcpdump_process.communicate() if out: print 'standard output of subprocess: %s' % out @@ -110,7 +111,7 @@ def subprocess_manager(self, exec_args): if self.tcpdump_process.returncode: raise RuntimeError('%s\ntcpdump had returncode: %d' % (exec_args, self.tcpdump_process.returncode)) - def __exit__(self, type, value, traceback): + def __exit__(self, func_type, value, traceback): ''' Class Cleanup ''' print '\nTCP Dumper.. Cleaning up :)' diff --git a/workers/bro/bro_log_reader.py b/workers/bro/bro_log_reader.py index cab8944..410a979 100644 --- a/workers/bro/bro_log_reader.py +++ b/workers/bro/bro_log_reader.py @@ -8,7 +8,7 @@ import time -class BroLogReader(): +class BroLogReader(object): ''' This class implements a python based Bro Log Reader. ''' def __init__(self, convert_datetimes=True): @@ -16,7 +16,7 @@ def __init__(self, convert_datetimes=True): self.delimiter = '\t' self.convert_datetimes = convert_datetimes - def read_log(self, logfile, max_rows=None): + def read_log(self, logfile): ''' The read_log method is a generator for rows in a Bro log. Usage: rows = my_bro_reader.read_log(logfile) for row in rows: @@ -29,7 +29,7 @@ def read_log(self, logfile, max_rows=None): logfile.seek(0) # First parse the header of the bro log - field_names, field_types = self._parse_bro_header(logfile) + field_names, _ = self._parse_bro_header(logfile) # Note: SO stupid to write a csv reader, but csv.DictReader on Bro # files was doing something weird with generator output that diff --git a/workers/pe_features.py b/workers/pe_features.py index 171006f..1025dcc 100644 --- a/workers/pe_features.py +++ b/workers/pe_features.py @@ -36,15 +36,17 @@ def __init__(self, verbose=False): 'iat_rva', 'major_version', 'minor_version', 'number_of_bound_import_symbols', 'number_of_bound_imports', 'number_of_export_symbols', 'number_of_import_symbols', 'number_of_imports', 'number_of_rva_and_sizes', 'number_of_sections', 'pe_warnings', - 'std_section_names', 'total_size_pe', 'virtual_address', 'virtual_size', 'virtual_size_2', - 'datadir_IMAGE_DIRECTORY_ENTRY_BASERELOC_size', 'datadir_IMAGE_DIRECTORY_ENTRY_RESOURCE_size', + 'std_section_names', 'total_size_pe', 'virtual_address', 'virtual_size', + 'virtual_size_2', 'datadir_IMAGE_DIRECTORY_ENTRY_BASERELOC_size', + 'datadir_IMAGE_DIRECTORY_ENTRY_RESOURCE_size', 'datadir_IMAGE_DIRECTORY_ENTRY_IAT_size', 'datadir_IMAGE_DIRECTORY_ENTRY_IMPORT_size', - 'pe_char', 'pe_dll', 'pe_driver', 'pe_exe', 'pe_i386', 'pe_majorlink', 'pe_minorlink', - 'sec_entropy_data', 'sec_entropy_rdata', 'sec_entropy_reloc', 'sec_entropy_text', - 'sec_entropy_rsrc', 'sec_rawptr_rsrc', 'sec_rawsize_rsrc', 'sec_vasize_rsrc', - 'sec_raw_execsize', 'sec_rawptr_data', 'sec_rawptr_text', 'sec_rawsize_data', - 'sec_rawsize_text', 'sec_va_execsize', 'sec_vasize_data', 'sec_vasize_text', - 'size_code', 'size_image', 'size_initdata', 'size_uninit']) + 'pe_char', 'pe_dll', 'pe_driver', 'pe_exe', 'pe_i386', 'pe_majorlink', + 'pe_minorlink', 'sec_entropy_data', 'sec_entropy_rdata', + 'sec_entropy_reloc', 'sec_entropy_text', 'sec_entropy_rsrc', 'sec_rawptr_rsrc', + 'sec_rawsize_rsrc', 'sec_vasize_rsrc', 'sec_raw_execsize', 'sec_rawptr_data', + 'sec_rawptr_text', 'sec_rawsize_data', 'sec_rawsize_text', 'sec_va_execsize', + 'sec_vasize_data', 'sec_vasize_text', 'size_code', 'size_image', 'size_initdata', + 'size_uninit']) self.set_sparse_features(['imported_symbols', 'section_names', 'pe_warning_strings']) diff --git a/workers/pe_indicators.py b/workers/pe_indicators.py index ac82313..340a249 100644 --- a/workers/pe_indicators.py +++ b/workers/pe_indicators.py @@ -46,7 +46,7 @@ def execute(self, input_data): ''' Execute the PEIndicators worker ''' raw_bytes = input_data['sample']['raw_bytes'] - ''' Analyze the output of pefile for any anomalous conditions. ''' + # Analyze the output of pefile for any anomalous conditions. # Have the PE File module process the file try: self.pefile_handle = pefile.PE(data=raw_bytes, fast_load=False) @@ -196,7 +196,7 @@ def check_communication_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description':'Imported symbols related to network communication', 'severity': 1, - 'category':'COMMUNICATION', 'attributes':matching_imports} + 'category':'COMMUNICATION', 'attributes':matching_imports} else: return None @@ -208,7 +208,7 @@ def check_elevating_privs_imports(self): matching_imports = self._search_for_import_symbols(imports) if (matching_imports): return {'description': 'Imported symbols related to elevating or attaining new privileges', - 'severity': 2, 'category': 'CREDENTIALS', 'attributes': matching_imports} + 'severity': 2, 'category': 'CREDENTIALS', 'attributes': matching_imports} else: return None @@ -220,7 +220,7 @@ def check_keylogging_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to keylogging activities', 'severity': 2, - 'category': 'KEYLOGGING', 'attributes': matching_imports} + 'category': 'KEYLOGGING', 'attributes': matching_imports} else: return None @@ -233,7 +233,7 @@ def check_system_state_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to changing system state', 'severity': 1, - 'category': 'SYSTEM_STATE', 'attributes': matching_imports} + 'category': 'SYSTEM_STATE', 'attributes': matching_imports} else: return None @@ -243,7 +243,7 @@ def check_system_probe_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to probing the system', 'severity': 2, - 'category': 'SYSTEM_PROBE', 'attributes': matching_imports} + 'category': 'SYSTEM_PROBE', 'attributes': matching_imports} else: return None @@ -253,7 +253,7 @@ def check_system_integrity_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to system security and integrity', - 'severity': 3, 'category': 'SYSTEM_INTEGRITY', 'attributes': matching_imports} + 'severity': 3, 'category': 'SYSTEM_INTEGRITY', 'attributes': matching_imports} else: return None @@ -263,7 +263,7 @@ def check_crypto_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to encryption', 'severity': 3, - 'category': 'ENCRYPTION', 'attributes': matching_imports} + 'category': 'ENCRYPTION', 'attributes': matching_imports} else: return None @@ -274,7 +274,7 @@ def check_anti_debug_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to anti-debugging', 'severity': 3, - 'category': 'ANTI_DEBUG', 'attributes': matching_imports} + 'category': 'ANTI_DEBUG', 'attributes': matching_imports} else: return None @@ -286,7 +286,7 @@ def check_com_service_imports(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to COM or Services', 'severity': 3, - 'category': 'COM_SERVICES', 'attributes': matching_imports} + 'category': 'COM_SERVICES', 'attributes': matching_imports} else: return None @@ -301,7 +301,7 @@ def check_process_manipulation(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to process manipulation/injection', - 'severity': 3, 'category': 'PROCESS_MANIPULATION', 'attributes': matching_imports} + 'severity': 3, 'category': 'PROCESS_MANIPULATION', 'attributes': matching_imports} else: return None @@ -312,7 +312,7 @@ def check_process_spawn(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to spawning a new process', 'severity': 2, - 'category': 'PROCESS_SPAWN', 'attributes': matching_imports} + 'category': 'PROCESS_SPAWN', 'attributes': matching_imports} else: return None @@ -322,7 +322,7 @@ def check_stealth_load(self): matching_imports = self._search_for_import_symbols(imports) if matching_imports: return {'description': 'Imported symbols related to loading libraries, resources, in a sneaky way', - 'severity': 2, 'category': 'STEALTH_LOAD', 'attributes': matching_imports} + 'severity': 2, 'category': 'STEALTH_LOAD', 'attributes': matching_imports} else: return None