nginx_to_jsonl/nginx_to_jsonl.py

102 lines
4.6 KiB
Python

import re, json, datetime, gzip, glob, os
def convert_logs_to_jsonl():
# Define log types and their patterns
log_types = {
'access': {
'pattern': 'access.log*',
'output': 'timesketch_access.jsonl',
'desc': 'HTTP Request Time'
},
'error': {
'pattern': 'error.log*',
'output': 'timesketch_error.jsonl',
'desc': 'Error Event Time'
},
'redirect': {
'pattern': 'redirect-access.log*',
'output': 'timesketch_redirect.jsonl',
'desc': 'Redirect Request Time'
}
}
# Regex pattern for nginx logs
pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?'
for log_type, config in log_types.items():
files = sorted(glob.glob(config['pattern']))
if not files:
continue
print(f"Processing {len(files)} {log_type} files...")
with open(config['output'], 'w') as out:
for log_file in files:
print(f" Processing: {log_file}")
# Handle compressed files
if log_file.endswith('.gz'):
opener = gzip.open
mode = 'rt'
else:
opener = open
mode = 'r'
try:
with opener(log_file, mode) as f:
for line_num, line in enumerate(f):
match = re.match(pattern, line.strip())
if match:
groups = match.groups()
ip = groups[0]
remote_ident = groups[1] if groups[1] != '-' else None
remote_user = groups[2] if groups[2] != '-' else None
timestamp = groups[3]
request = groups[4]
status = groups[5]
size = groups[6]
referer = groups[7] if groups[7] != '-' else None
user_agent = groups[8]
additional = groups[9] if len(groups) > 9 and groups[9] else None
# Parse HTTP request
request_parts = request.split(' ')
method = request_parts[0] if len(request_parts) > 0 else None
uri = request_parts[1] if len(request_parts) > 1 else None
protocol = request_parts[2] if len(request_parts) > 2 else None
# Convert timestamp
dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z')
event = {
"message": line.strip(),
"datetime": dt.isoformat(),
"timestamp_desc": config['desc'],
"source_file": log_file,
"log_type": log_type,
"source_ip": ip,
"remote_ident": remote_ident,
"remote_user": remote_user,
"http_method": method,
"http_uri": uri,
"http_protocol": protocol,
"http_request_full": request,
"status_code": int(status),
"response_size": int(size) if size.isdigit() else 0,
"referer": referer,
"user_agent": user_agent,
"additional_field": additional,
"data_type": f"web:{log_type}:log"
}
# Remove None values
event = {k: v for k, v in event.items() if v is not None}
out.write(json.dumps(event) + '\n')
except Exception as e:
print(f" Error processing {log_file}: {e}")
print(f" Output: {config['output']}")
convert_logs_to_jsonl()