import re, json, datetime, gzip, glob, os def convert_logs_to_jsonl(): # Define log types and their patterns log_types = { 'access': { 'pattern': 'access.log*', 'output': 'timesketch_access.jsonl', 'desc': 'HTTP Request Time' }, 'error': { 'pattern': 'error.log*', 'output': 'timesketch_error.jsonl', 'desc': 'Error Event Time' }, 'redirect': { 'pattern': 'redirect-access.log*', 'output': 'timesketch_redirect.jsonl', 'desc': 'Redirect Request Time' } } # Regex pattern for nginx logs pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?' for log_type, config in log_types.items(): files = sorted(glob.glob(config['pattern'])) if not files: continue print(f"Processing {len(files)} {log_type} files...") with open(config['output'], 'w') as out: for log_file in files: print(f" Processing: {log_file}") # Handle compressed files if log_file.endswith('.gz'): opener = gzip.open mode = 'rt' else: opener = open mode = 'r' try: with opener(log_file, mode) as f: for line_num, line in enumerate(f): match = re.match(pattern, line.strip()) if match: groups = match.groups() ip = groups[0] remote_ident = groups[1] if groups[1] != '-' else None remote_user = groups[2] if groups[2] != '-' else None timestamp = groups[3] request = groups[4] status = groups[5] size = groups[6] referer = groups[7] if groups[7] != '-' else None user_agent = groups[8] additional = groups[9] if len(groups) > 9 and groups[9] else None # Parse HTTP request request_parts = request.split(' ') method = request_parts[0] if len(request_parts) > 0 else None uri = request_parts[1] if len(request_parts) > 1 else None protocol = request_parts[2] if len(request_parts) > 2 else None # Convert timestamp dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z') event = { "message": line.strip(), "datetime": dt.isoformat(), "timestamp_desc": config['desc'], "source_file": log_file, "log_type": log_type, "source_ip": ip, "remote_ident": remote_ident, "remote_user": remote_user, "http_method": method, "http_uri": uri, "http_protocol": protocol, "http_request_full": request, "status_code": int(status), "response_size": int(size) if size.isdigit() else 0, "referer": referer, "user_agent": user_agent, "additional_field": additional, "data_type": f"web:{log_type}:log" } # Remove None values event = {k: v for k, v in event.items() if v is not None} out.write(json.dumps(event) + '\n') except Exception as e: print(f" Error processing {log_file}: {e}") print(f" Output: {config['output']}") convert_logs_to_jsonl()