From 6f008a18af3dcc67b3c2cbf53e8b1470d8a90049 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20St=C3=B6ckl?= Date: Wed, 30 Jul 2025 10:47:51 +0000 Subject: [PATCH] =?UTF-8?q?nginx=5Fto=5Fjsonl.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nginx_to_jsonl.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 nginx_to_jsonl.py diff --git a/nginx_to_jsonl.py b/nginx_to_jsonl.py new file mode 100644 index 0000000..6fad6c1 --- /dev/null +++ b/nginx_to_jsonl.py @@ -0,0 +1,101 @@ +import re, json, datetime, gzip, glob, os + +def convert_logs_to_jsonl(): + # Define log types and their patterns + log_types = { + 'access': { + 'pattern': 'access.log*', + 'output': 'timesketch_access.jsonl', + 'desc': 'HTTP Request Time' + }, + 'error': { + 'pattern': 'error.log*', + 'output': 'timesketch_error.jsonl', + 'desc': 'Error Event Time' + }, + 'redirect': { + 'pattern': 'redirect-access.log*', + 'output': 'timesketch_redirect.jsonl', + 'desc': 'Redirect Request Time' + } + } + + # Regex pattern for nginx logs + pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?' + + for log_type, config in log_types.items(): + files = sorted(glob.glob(config['pattern'])) + if not files: + continue + + print(f"Processing {len(files)} {log_type} files...") + + with open(config['output'], 'w') as out: + for log_file in files: + print(f" Processing: {log_file}") + + # Handle compressed files + if log_file.endswith('.gz'): + opener = gzip.open + mode = 'rt' + else: + opener = open + mode = 'r' + + try: + with opener(log_file, mode) as f: + for line_num, line in enumerate(f): + match = re.match(pattern, line.strip()) + if match: + groups = match.groups() + ip = groups[0] + remote_ident = groups[1] if groups[1] != '-' else None + remote_user = groups[2] if groups[2] != '-' else None + timestamp = groups[3] + request = groups[4] + status = groups[5] + size = groups[6] + referer = groups[7] if groups[7] != '-' else None + user_agent = groups[8] + additional = groups[9] if len(groups) > 9 and groups[9] else None + + # Parse HTTP request + request_parts = request.split(' ') + method = request_parts[0] if len(request_parts) > 0 else None + uri = request_parts[1] if len(request_parts) > 1 else None + protocol = request_parts[2] if len(request_parts) > 2 else None + + # Convert timestamp + dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z') + + event = { + "message": line.strip(), + "datetime": dt.isoformat(), + "timestamp_desc": config['desc'], + "source_file": log_file, + "log_type": log_type, + "source_ip": ip, + "remote_ident": remote_ident, + "remote_user": remote_user, + "http_method": method, + "http_uri": uri, + "http_protocol": protocol, + "http_request_full": request, + "status_code": int(status), + "response_size": int(size) if size.isdigit() else 0, + "referer": referer, + "user_agent": user_agent, + "additional_field": additional, + "data_type": f"web:{log_type}:log" + } + + # Remove None values + event = {k: v for k, v in event.items() if v is not None} + out.write(json.dumps(event) + '\n') + + except Exception as e: + print(f" Error processing {log_file}: {e}") + + print(f" Output: {config['output']}") + +convert_logs_to_jsonl()