Limit readline()

This commit is contained in:
pixeebot[bot] 2024-04-16 04:51:38 +00:00 committed by GitHub
parent 3ff9520114
commit 7879dd3aac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,178 +1,178 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from json import loads, dumps from json import loads, dumps
from collections import OrderedDict from collections import OrderedDict
from datetime import datetime from datetime import datetime
from traceback import print_exc from traceback import print_exc
# Taken from https://github.com/dgunter/ParseZeekLogs <3 # Taken from https://github.com/dgunter/ParseZeekLogs <3
class ParseZeekLogs(object): class ParseZeekLogs(object):
""" """
Class that parses Zeek logs and allows log data to be output in CSV or json format. Class that parses Zeek logs and allows log data to be output in CSV or json format.
Attributes: filepath: Path of Zeek log file to read Attributes: filepath: Path of Zeek log file to read
""" """
def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False): def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False):
self.fd = open(filepath, "r") self.fd = open(filepath, "r")
self.options = OrderedDict() self.options = OrderedDict()
self.firstRun = True self.firstRun = True
self.filtered_fields = fields self.filtered_fields = fields
self.batchsize = batchsize self.batchsize = batchsize
self.output_format = output_format self.output_format = output_format
self.ignore_keys = ignore_keys self.ignore_keys = ignore_keys
self.meta = meta self.meta = meta
self.safe_headers = safe_headers self.safe_headers = safe_headers
# Convert ' to " in meta string # Convert ' to " in meta string
meta = loads(dumps(meta).replace("'", '"')) meta = loads(dumps(meta).replace("'", '"'))
# Read the header option lines # Read the header option lines
l = self.fd.readline().strip() l = self.fd.readline(5_000_000).strip()
while l.strip().startswith("#"): while l.strip().startswith("#"):
# Parse the options out # Parse the options out
if l.startswith("#separator"): if l.startswith("#separator"):
key = str(l[1:].split(" ")[0]) key = str(l[1:].split(" ")[0])
value = str.encode(l[1:].split( value = str.encode(l[1:].split(
" ")[1].strip()).decode('unicode_escape') " ")[1].strip()).decode('unicode_escape')
self.options[key] = value self.options[key] = value
elif l.startswith("#"): elif l.startswith("#"):
key = str(l[1:].split(self.options.get('separator'))[0]) key = str(l[1:].split(self.options.get('separator'))[0])
value = l[1:].split(self.options.get('separator'))[1:] value = l[1:].split(self.options.get('separator'))[1:]
self.options[key] = value self.options[key] = value
# Read the next line # Read the next line
l = self.fd.readline().strip() l = self.fd.readline(5_000_000).strip()
self.firstLine = l self.firstLine = l
# Save mapping of fields to values: # Save mapping of fields to values:
self.fields = self.options.get('fields') self.fields = self.options.get('fields')
self.types = self.options.get('types') self.types = self.options.get('types')
self.data_types = {} self.data_types = {}
for i, val in enumerate(self.fields): for i, val in enumerate(self.fields):
# Convert field names if safe_headers is enabled # Convert field names if safe_headers is enabled
if self.safe_headers is True: if self.safe_headers is True:
self.fields[i] = self.fields[i].replace(".", "_") self.fields[i] = self.fields[i].replace(".", "_")
# Match types with each other # Match types with each other
self.data_types[self.fields[i]] = self.types[i] self.data_types[self.fields[i]] = self.types[i]
def __del__(self): def __del__(self):
self.fd.close() self.fd.close()
def __iter__(self): def __iter__(self):
return self return self
def __next__(self): def __next__(self):
retVal = "" retVal = ""
if self.firstRun is True: if self.firstRun is True:
retVal = self.firstLine retVal = self.firstLine
self.firstRun = False self.firstRun = False
else: else:
retVal = self.fd.readline().strip() retVal = self.fd.readline().strip()
# If an empty string is returned, readline is done reading # If an empty string is returned, readline is done reading
if retVal == "" or retVal is None: if retVal == "" or retVal is None:
raise StopIteration raise StopIteration
# Split out the data we are going to return # Split out the data we are going to return
retVal = retVal.split(self.options.get('separator')) retVal = retVal.split(self.options.get('separator'))
record = None record = None
# Make sure we aren't dealing with a comment line # Make sure we aren't dealing with a comment line
if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \ if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \
and len(retVal) is len(self.options.get("fields")): and len(retVal) is len(self.options.get("fields")):
record = OrderedDict() record = OrderedDict()
# Prepare fields for conversion # Prepare fields for conversion
for x in range(0, len(retVal)): for x in range(0, len(retVal)):
if self.safe_headers is True: if self.safe_headers is True:
converted_field_name = self.options.get( converted_field_name = self.options.get(
"fields")[x].replace(".", "_") "fields")[x].replace(".", "_")
else: else:
converted_field_name = self.options.get("fields")[x] converted_field_name = self.options.get("fields")[x]
if self.filtered_fields is None or converted_field_name in self.filtered_fields: if self.filtered_fields is None or converted_field_name in self.filtered_fields:
# Translate - to "" to fix a conversation error # Translate - to "" to fix a conversation error
if retVal[x] == "-": if retVal[x] == "-":
retVal[x] = "" retVal[x] = ""
# Save the record field if the field isn't filtered out # Save the record field if the field isn't filtered out
record[converted_field_name] = retVal[x] record[converted_field_name] = retVal[x]
# Convert values to the appropriate record type # Convert values to the appropriate record type
record = self.convert_values( record = self.convert_values(
record, self.ignore_keys, self.data_types) record, self.ignore_keys, self.data_types)
if record is not None and self.output_format == "json": if record is not None and self.output_format == "json":
# Output will be json # Output will be json
# Add metadata to json # Add metadata to json
for k, v in self.meta.items(): for k, v in self.meta.items():
record[k] = v record[k] = v
retVal = record retVal = record
elif record is not None and self.output_format == "csv": elif record is not None and self.output_format == "csv":
retVal = "" retVal = ""
# Add escaping to csv format # Add escaping to csv format
for k, v in record.items(): for k, v in record.items():
# Add escaping to string values # Add escaping to string values
if isinstance(v, str): if isinstance(v, str):
retVal += str("\"" + str(v).strip() + "\"" + ",") retVal += str("\"" + str(v).strip() + "\"" + ",")
else: else:
retVal += str(str(v).strip() + ",") retVal += str(str(v).strip() + ",")
# Remove the trailing comma # Remove the trailing comma
retVal = retVal[:-1] retVal = retVal[:-1]
else: else:
retVal = None retVal = None
return retVal return retVal
def convert_values(self, data, ignore_keys=[], data_types={}): def convert_values(self, data, ignore_keys=[], data_types={}):
keys_to_delete = [] keys_to_delete = []
for k, v in data.items(): for k, v in data.items():
# print("evaluating k: " + str(k) + " v: " + str(v)) # print("evaluating k: " + str(k) + " v: " + str(v))
if isinstance(v, dict): if isinstance(v, dict):
data[k] = self.convert_values(v) data[k] = self.convert_values(v)
else: else:
if data_types.get(k) is not None: if data_types.get(k) is not None:
if (data_types.get(k) == "port" or data_types.get(k) == "count"): if (data_types.get(k) == "port" or data_types.get(k) == "count"):
if v != "": if v != "":
data[k] = int(v) data[k] = int(v)
else: else:
keys_to_delete.append(k) keys_to_delete.append(k)
elif (data_types.get(k) == "double" or data_types.get(k) == "interval"): elif (data_types.get(k) == "double" or data_types.get(k) == "interval"):
if v != "": if v != "":
data[k] = float(v) data[k] = float(v)
else: else:
keys_to_delete.append(k) keys_to_delete.append(k)
elif data_types.get(k) == "bool": elif data_types.get(k) == "bool":
data[k] = bool(v) data[k] = bool(v)
else: else:
data[k] = v data[k] = v
for k in keys_to_delete: for k in keys_to_delete:
del data[k] del data[k]
return data return data
def get_fields(self): def get_fields(self):
"""Returns all fields present in the log file """Returns all fields present in the log file
Returns: Returns:
A python list containing all field names in the log file A python list containing all field names in the log file
""" """
field_names = "" field_names = ""
if self.output_format == "csv": if self.output_format == "csv":
for i, v in enumerate(self.fields): for i, v in enumerate(self.fields):
if self.filtered_fields is None or v in self.filtered_fields: if self.filtered_fields is None or v in self.filtered_fields:
field_names += str(v) + "," field_names += str(v) + ","
# Remove the trailing comma # Remove the trailing comma
field_names = field_names[:-1].strip() field_names = field_names[:-1].strip()
else: else:
field_names = [] field_names = []
for i, v in enumerate(self.fields): for i, v in enumerate(self.fields):
if self.filtered_fields is None or v in self.filtered_fields: if self.filtered_fields is None or v in self.filtered_fields:
field_names.append(v) field_names.append(v)
return field_names return field_names