Limit readline()
This commit is contained in:
parent
3ff9520114
commit
7879dd3aac
@ -1,178 +1,178 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from json import loads, dumps
|
from json import loads, dumps
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from traceback import print_exc
|
from traceback import print_exc
|
||||||
|
|
||||||
# Taken from https://github.com/dgunter/ParseZeekLogs <3
|
# Taken from https://github.com/dgunter/ParseZeekLogs <3
|
||||||
|
|
||||||
|
|
||||||
class ParseZeekLogs(object):
|
class ParseZeekLogs(object):
|
||||||
"""
|
"""
|
||||||
Class that parses Zeek logs and allows log data to be output in CSV or json format.
|
Class that parses Zeek logs and allows log data to be output in CSV or json format.
|
||||||
Attributes: filepath: Path of Zeek log file to read
|
Attributes: filepath: Path of Zeek log file to read
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False):
|
def __init__(self, filepath, batchsize=500, fields=None, output_format=None, ignore_keys=[], meta={}, safe_headers=False):
|
||||||
self.fd = open(filepath, "r")
|
self.fd = open(filepath, "r")
|
||||||
self.options = OrderedDict()
|
self.options = OrderedDict()
|
||||||
self.firstRun = True
|
self.firstRun = True
|
||||||
self.filtered_fields = fields
|
self.filtered_fields = fields
|
||||||
self.batchsize = batchsize
|
self.batchsize = batchsize
|
||||||
self.output_format = output_format
|
self.output_format = output_format
|
||||||
self.ignore_keys = ignore_keys
|
self.ignore_keys = ignore_keys
|
||||||
self.meta = meta
|
self.meta = meta
|
||||||
self.safe_headers = safe_headers
|
self.safe_headers = safe_headers
|
||||||
|
|
||||||
# Convert ' to " in meta string
|
# Convert ' to " in meta string
|
||||||
meta = loads(dumps(meta).replace("'", '"'))
|
meta = loads(dumps(meta).replace("'", '"'))
|
||||||
|
|
||||||
# Read the header option lines
|
# Read the header option lines
|
||||||
l = self.fd.readline().strip()
|
l = self.fd.readline(5_000_000).strip()
|
||||||
while l.strip().startswith("#"):
|
while l.strip().startswith("#"):
|
||||||
# Parse the options out
|
# Parse the options out
|
||||||
if l.startswith("#separator"):
|
if l.startswith("#separator"):
|
||||||
key = str(l[1:].split(" ")[0])
|
key = str(l[1:].split(" ")[0])
|
||||||
value = str.encode(l[1:].split(
|
value = str.encode(l[1:].split(
|
||||||
" ")[1].strip()).decode('unicode_escape')
|
" ")[1].strip()).decode('unicode_escape')
|
||||||
self.options[key] = value
|
self.options[key] = value
|
||||||
elif l.startswith("#"):
|
elif l.startswith("#"):
|
||||||
key = str(l[1:].split(self.options.get('separator'))[0])
|
key = str(l[1:].split(self.options.get('separator'))[0])
|
||||||
value = l[1:].split(self.options.get('separator'))[1:]
|
value = l[1:].split(self.options.get('separator'))[1:]
|
||||||
self.options[key] = value
|
self.options[key] = value
|
||||||
|
|
||||||
# Read the next line
|
# Read the next line
|
||||||
l = self.fd.readline().strip()
|
l = self.fd.readline(5_000_000).strip()
|
||||||
|
|
||||||
self.firstLine = l
|
self.firstLine = l
|
||||||
|
|
||||||
# Save mapping of fields to values:
|
# Save mapping of fields to values:
|
||||||
self.fields = self.options.get('fields')
|
self.fields = self.options.get('fields')
|
||||||
self.types = self.options.get('types')
|
self.types = self.options.get('types')
|
||||||
|
|
||||||
self.data_types = {}
|
self.data_types = {}
|
||||||
for i, val in enumerate(self.fields):
|
for i, val in enumerate(self.fields):
|
||||||
# Convert field names if safe_headers is enabled
|
# Convert field names if safe_headers is enabled
|
||||||
if self.safe_headers is True:
|
if self.safe_headers is True:
|
||||||
self.fields[i] = self.fields[i].replace(".", "_")
|
self.fields[i] = self.fields[i].replace(".", "_")
|
||||||
|
|
||||||
# Match types with each other
|
# Match types with each other
|
||||||
self.data_types[self.fields[i]] = self.types[i]
|
self.data_types[self.fields[i]] = self.types[i]
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
self.fd.close()
|
self.fd.close()
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
retVal = ""
|
retVal = ""
|
||||||
if self.firstRun is True:
|
if self.firstRun is True:
|
||||||
retVal = self.firstLine
|
retVal = self.firstLine
|
||||||
self.firstRun = False
|
self.firstRun = False
|
||||||
else:
|
else:
|
||||||
retVal = self.fd.readline().strip()
|
retVal = self.fd.readline().strip()
|
||||||
|
|
||||||
# If an empty string is returned, readline is done reading
|
# If an empty string is returned, readline is done reading
|
||||||
if retVal == "" or retVal is None:
|
if retVal == "" or retVal is None:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
|
|
||||||
# Split out the data we are going to return
|
# Split out the data we are going to return
|
||||||
retVal = retVal.split(self.options.get('separator'))
|
retVal = retVal.split(self.options.get('separator'))
|
||||||
|
|
||||||
record = None
|
record = None
|
||||||
# Make sure we aren't dealing with a comment line
|
# Make sure we aren't dealing with a comment line
|
||||||
if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \
|
if len(retVal) > 0 and not str(retVal[0]).strip().startswith("#") \
|
||||||
and len(retVal) is len(self.options.get("fields")):
|
and len(retVal) is len(self.options.get("fields")):
|
||||||
record = OrderedDict()
|
record = OrderedDict()
|
||||||
# Prepare fields for conversion
|
# Prepare fields for conversion
|
||||||
for x in range(0, len(retVal)):
|
for x in range(0, len(retVal)):
|
||||||
if self.safe_headers is True:
|
if self.safe_headers is True:
|
||||||
converted_field_name = self.options.get(
|
converted_field_name = self.options.get(
|
||||||
"fields")[x].replace(".", "_")
|
"fields")[x].replace(".", "_")
|
||||||
else:
|
else:
|
||||||
converted_field_name = self.options.get("fields")[x]
|
converted_field_name = self.options.get("fields")[x]
|
||||||
if self.filtered_fields is None or converted_field_name in self.filtered_fields:
|
if self.filtered_fields is None or converted_field_name in self.filtered_fields:
|
||||||
# Translate - to "" to fix a conversation error
|
# Translate - to "" to fix a conversation error
|
||||||
if retVal[x] == "-":
|
if retVal[x] == "-":
|
||||||
retVal[x] = ""
|
retVal[x] = ""
|
||||||
# Save the record field if the field isn't filtered out
|
# Save the record field if the field isn't filtered out
|
||||||
record[converted_field_name] = retVal[x]
|
record[converted_field_name] = retVal[x]
|
||||||
|
|
||||||
# Convert values to the appropriate record type
|
# Convert values to the appropriate record type
|
||||||
record = self.convert_values(
|
record = self.convert_values(
|
||||||
record, self.ignore_keys, self.data_types)
|
record, self.ignore_keys, self.data_types)
|
||||||
|
|
||||||
if record is not None and self.output_format == "json":
|
if record is not None and self.output_format == "json":
|
||||||
# Output will be json
|
# Output will be json
|
||||||
|
|
||||||
# Add metadata to json
|
# Add metadata to json
|
||||||
for k, v in self.meta.items():
|
for k, v in self.meta.items():
|
||||||
record[k] = v
|
record[k] = v
|
||||||
|
|
||||||
retVal = record
|
retVal = record
|
||||||
elif record is not None and self.output_format == "csv":
|
elif record is not None and self.output_format == "csv":
|
||||||
retVal = ""
|
retVal = ""
|
||||||
# Add escaping to csv format
|
# Add escaping to csv format
|
||||||
for k, v in record.items():
|
for k, v in record.items():
|
||||||
# Add escaping to string values
|
# Add escaping to string values
|
||||||
if isinstance(v, str):
|
if isinstance(v, str):
|
||||||
retVal += str("\"" + str(v).strip() + "\"" + ",")
|
retVal += str("\"" + str(v).strip() + "\"" + ",")
|
||||||
else:
|
else:
|
||||||
retVal += str(str(v).strip() + ",")
|
retVal += str(str(v).strip() + ",")
|
||||||
# Remove the trailing comma
|
# Remove the trailing comma
|
||||||
retVal = retVal[:-1]
|
retVal = retVal[:-1]
|
||||||
else:
|
else:
|
||||||
retVal = None
|
retVal = None
|
||||||
|
|
||||||
return retVal
|
return retVal
|
||||||
|
|
||||||
def convert_values(self, data, ignore_keys=[], data_types={}):
|
def convert_values(self, data, ignore_keys=[], data_types={}):
|
||||||
keys_to_delete = []
|
keys_to_delete = []
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
# print("evaluating k: " + str(k) + " v: " + str(v))
|
# print("evaluating k: " + str(k) + " v: " + str(v))
|
||||||
|
|
||||||
if isinstance(v, dict):
|
if isinstance(v, dict):
|
||||||
data[k] = self.convert_values(v)
|
data[k] = self.convert_values(v)
|
||||||
else:
|
else:
|
||||||
if data_types.get(k) is not None:
|
if data_types.get(k) is not None:
|
||||||
if (data_types.get(k) == "port" or data_types.get(k) == "count"):
|
if (data_types.get(k) == "port" or data_types.get(k) == "count"):
|
||||||
if v != "":
|
if v != "":
|
||||||
data[k] = int(v)
|
data[k] = int(v)
|
||||||
else:
|
else:
|
||||||
keys_to_delete.append(k)
|
keys_to_delete.append(k)
|
||||||
elif (data_types.get(k) == "double" or data_types.get(k) == "interval"):
|
elif (data_types.get(k) == "double" or data_types.get(k) == "interval"):
|
||||||
if v != "":
|
if v != "":
|
||||||
data[k] = float(v)
|
data[k] = float(v)
|
||||||
else:
|
else:
|
||||||
keys_to_delete.append(k)
|
keys_to_delete.append(k)
|
||||||
elif data_types.get(k) == "bool":
|
elif data_types.get(k) == "bool":
|
||||||
data[k] = bool(v)
|
data[k] = bool(v)
|
||||||
else:
|
else:
|
||||||
data[k] = v
|
data[k] = v
|
||||||
|
|
||||||
for k in keys_to_delete:
|
for k in keys_to_delete:
|
||||||
del data[k]
|
del data[k]
|
||||||
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def get_fields(self):
|
def get_fields(self):
|
||||||
"""Returns all fields present in the log file
|
"""Returns all fields present in the log file
|
||||||
Returns:
|
Returns:
|
||||||
A python list containing all field names in the log file
|
A python list containing all field names in the log file
|
||||||
"""
|
"""
|
||||||
field_names = ""
|
field_names = ""
|
||||||
if self.output_format == "csv":
|
if self.output_format == "csv":
|
||||||
for i, v in enumerate(self.fields):
|
for i, v in enumerate(self.fields):
|
||||||
if self.filtered_fields is None or v in self.filtered_fields:
|
if self.filtered_fields is None or v in self.filtered_fields:
|
||||||
field_names += str(v) + ","
|
field_names += str(v) + ","
|
||||||
# Remove the trailing comma
|
# Remove the trailing comma
|
||||||
field_names = field_names[:-1].strip()
|
field_names = field_names[:-1].strip()
|
||||||
else:
|
else:
|
||||||
field_names = []
|
field_names = []
|
||||||
for i, v in enumerate(self.fields):
|
for i, v in enumerate(self.fields):
|
||||||
if self.filtered_fields is None or v in self.filtered_fields:
|
if self.filtered_fields is None or v in self.filtered_fields:
|
||||||
field_names.append(v)
|
field_names.append(v)
|
||||||
return field_names
|
return field_names
|
||||||
|
Loading…
Reference in New Issue
Block a user