nginx_to_jsonl.py hinzugefügt
This commit is contained in:
parent
f8083beb9d
commit
6f008a18af
101
nginx_to_jsonl.py
Normal file
101
nginx_to_jsonl.py
Normal file
@ -0,0 +1,101 @@
|
||||
import re, json, datetime, gzip, glob, os
|
||||
|
||||
def convert_logs_to_jsonl():
|
||||
# Define log types and their patterns
|
||||
log_types = {
|
||||
'access': {
|
||||
'pattern': 'access.log*',
|
||||
'output': 'timesketch_access.jsonl',
|
||||
'desc': 'HTTP Request Time'
|
||||
},
|
||||
'error': {
|
||||
'pattern': 'error.log*',
|
||||
'output': 'timesketch_error.jsonl',
|
||||
'desc': 'Error Event Time'
|
||||
},
|
||||
'redirect': {
|
||||
'pattern': 'redirect-access.log*',
|
||||
'output': 'timesketch_redirect.jsonl',
|
||||
'desc': 'Redirect Request Time'
|
||||
}
|
||||
}
|
||||
|
||||
# Regex pattern for nginx logs
|
||||
pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?'
|
||||
|
||||
for log_type, config in log_types.items():
|
||||
files = sorted(glob.glob(config['pattern']))
|
||||
if not files:
|
||||
continue
|
||||
|
||||
print(f"Processing {len(files)} {log_type} files...")
|
||||
|
||||
with open(config['output'], 'w') as out:
|
||||
for log_file in files:
|
||||
print(f" Processing: {log_file}")
|
||||
|
||||
# Handle compressed files
|
||||
if log_file.endswith('.gz'):
|
||||
opener = gzip.open
|
||||
mode = 'rt'
|
||||
else:
|
||||
opener = open
|
||||
mode = 'r'
|
||||
|
||||
try:
|
||||
with opener(log_file, mode) as f:
|
||||
for line_num, line in enumerate(f):
|
||||
match = re.match(pattern, line.strip())
|
||||
if match:
|
||||
groups = match.groups()
|
||||
ip = groups[0]
|
||||
remote_ident = groups[1] if groups[1] != '-' else None
|
||||
remote_user = groups[2] if groups[2] != '-' else None
|
||||
timestamp = groups[3]
|
||||
request = groups[4]
|
||||
status = groups[5]
|
||||
size = groups[6]
|
||||
referer = groups[7] if groups[7] != '-' else None
|
||||
user_agent = groups[8]
|
||||
additional = groups[9] if len(groups) > 9 and groups[9] else None
|
||||
|
||||
# Parse HTTP request
|
||||
request_parts = request.split(' ')
|
||||
method = request_parts[0] if len(request_parts) > 0 else None
|
||||
uri = request_parts[1] if len(request_parts) > 1 else None
|
||||
protocol = request_parts[2] if len(request_parts) > 2 else None
|
||||
|
||||
# Convert timestamp
|
||||
dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
||||
|
||||
event = {
|
||||
"message": line.strip(),
|
||||
"datetime": dt.isoformat(),
|
||||
"timestamp_desc": config['desc'],
|
||||
"source_file": log_file,
|
||||
"log_type": log_type,
|
||||
"source_ip": ip,
|
||||
"remote_ident": remote_ident,
|
||||
"remote_user": remote_user,
|
||||
"http_method": method,
|
||||
"http_uri": uri,
|
||||
"http_protocol": protocol,
|
||||
"http_request_full": request,
|
||||
"status_code": int(status),
|
||||
"response_size": int(size) if size.isdigit() else 0,
|
||||
"referer": referer,
|
||||
"user_agent": user_agent,
|
||||
"additional_field": additional,
|
||||
"data_type": f"web:{log_type}:log"
|
||||
}
|
||||
|
||||
# Remove None values
|
||||
event = {k: v for k, v in event.items() if v is not None}
|
||||
out.write(json.dumps(event) + '\n')
|
||||
|
||||
except Exception as e:
|
||||
print(f" Error processing {log_file}: {e}")
|
||||
|
||||
print(f" Output: {config['output']}")
|
||||
|
||||
convert_logs_to_jsonl()
|
Loading…
x
Reference in New Issue
Block a user