nginx_to_jsonl.py hinzugefügt
This commit is contained in:
parent
f8083beb9d
commit
6f008a18af
101
nginx_to_jsonl.py
Normal file
101
nginx_to_jsonl.py
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
import re, json, datetime, gzip, glob, os
|
||||||
|
|
||||||
|
def convert_logs_to_jsonl():
|
||||||
|
# Define log types and their patterns
|
||||||
|
log_types = {
|
||||||
|
'access': {
|
||||||
|
'pattern': 'access.log*',
|
||||||
|
'output': 'timesketch_access.jsonl',
|
||||||
|
'desc': 'HTTP Request Time'
|
||||||
|
},
|
||||||
|
'error': {
|
||||||
|
'pattern': 'error.log*',
|
||||||
|
'output': 'timesketch_error.jsonl',
|
||||||
|
'desc': 'Error Event Time'
|
||||||
|
},
|
||||||
|
'redirect': {
|
||||||
|
'pattern': 'redirect-access.log*',
|
||||||
|
'output': 'timesketch_redirect.jsonl',
|
||||||
|
'desc': 'Redirect Request Time'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Regex pattern for nginx logs
|
||||||
|
pattern = r'(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\d+) (\S+) "([^"]*)" "([^"]*)"(?:\s+"([^"]*)")?'
|
||||||
|
|
||||||
|
for log_type, config in log_types.items():
|
||||||
|
files = sorted(glob.glob(config['pattern']))
|
||||||
|
if not files:
|
||||||
|
continue
|
||||||
|
|
||||||
|
print(f"Processing {len(files)} {log_type} files...")
|
||||||
|
|
||||||
|
with open(config['output'], 'w') as out:
|
||||||
|
for log_file in files:
|
||||||
|
print(f" Processing: {log_file}")
|
||||||
|
|
||||||
|
# Handle compressed files
|
||||||
|
if log_file.endswith('.gz'):
|
||||||
|
opener = gzip.open
|
||||||
|
mode = 'rt'
|
||||||
|
else:
|
||||||
|
opener = open
|
||||||
|
mode = 'r'
|
||||||
|
|
||||||
|
try:
|
||||||
|
with opener(log_file, mode) as f:
|
||||||
|
for line_num, line in enumerate(f):
|
||||||
|
match = re.match(pattern, line.strip())
|
||||||
|
if match:
|
||||||
|
groups = match.groups()
|
||||||
|
ip = groups[0]
|
||||||
|
remote_ident = groups[1] if groups[1] != '-' else None
|
||||||
|
remote_user = groups[2] if groups[2] != '-' else None
|
||||||
|
timestamp = groups[3]
|
||||||
|
request = groups[4]
|
||||||
|
status = groups[5]
|
||||||
|
size = groups[6]
|
||||||
|
referer = groups[7] if groups[7] != '-' else None
|
||||||
|
user_agent = groups[8]
|
||||||
|
additional = groups[9] if len(groups) > 9 and groups[9] else None
|
||||||
|
|
||||||
|
# Parse HTTP request
|
||||||
|
request_parts = request.split(' ')
|
||||||
|
method = request_parts[0] if len(request_parts) > 0 else None
|
||||||
|
uri = request_parts[1] if len(request_parts) > 1 else None
|
||||||
|
protocol = request_parts[2] if len(request_parts) > 2 else None
|
||||||
|
|
||||||
|
# Convert timestamp
|
||||||
|
dt = datetime.datetime.strptime(timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
||||||
|
|
||||||
|
event = {
|
||||||
|
"message": line.strip(),
|
||||||
|
"datetime": dt.isoformat(),
|
||||||
|
"timestamp_desc": config['desc'],
|
||||||
|
"source_file": log_file,
|
||||||
|
"log_type": log_type,
|
||||||
|
"source_ip": ip,
|
||||||
|
"remote_ident": remote_ident,
|
||||||
|
"remote_user": remote_user,
|
||||||
|
"http_method": method,
|
||||||
|
"http_uri": uri,
|
||||||
|
"http_protocol": protocol,
|
||||||
|
"http_request_full": request,
|
||||||
|
"status_code": int(status),
|
||||||
|
"response_size": int(size) if size.isdigit() else 0,
|
||||||
|
"referer": referer,
|
||||||
|
"user_agent": user_agent,
|
||||||
|
"additional_field": additional,
|
||||||
|
"data_type": f"web:{log_type}:log"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove None values
|
||||||
|
event = {k: v for k, v in event.items() if v is not None}
|
||||||
|
out.write(json.dumps(event) + '\n')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" Error processing {log_file}: {e}")
|
||||||
|
|
||||||
|
print(f" Output: {config['output']}")
|
||||||
|
|
||||||
|
convert_logs_to_jsonl()
|
Loading…
x
Reference in New Issue
Block a user