I have a python lambda code to convert syslog records to Json. when deployed it getting unexpected errors.
from __future__ import print_function
import base64
import json
import gzip
import re
print('Loading function')
def lambda_handler(event, context):
output = []
succeeded_record_cnt = 0
failed_record_cnt = 0
for record in event['records']:
print(record['recordId'])
payload = base64.b64decode(record['data'])
regex_string = (r"^((?:\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?"
r"|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?)\b\s+(?:(?:0[1-9])|(?:[12][0-9])|(?:3[01])|[1-9])\s+"
r"(?:(?:2[0123]|[01]?[0-9]):(?:[0-5][0-9]):(?:(?:[0-5]?[0-9]|60)(?:[:\.,][0-9]+)?)))) (?:<(?:[0-9]+).(?:[0-9]+)> )"
r"?((?:[a-zA-Z0-9._-]+)) ([\w\._/%-]+)(?:\[((?:[1-9][0-9]*))\])?: (.*)")
p = re.compile(regex_string)
m = p.match(payload)
if m:
succeeded_record_cnt += 1
data_field = {
'timestamp': m.group(1),
'hostname': m.group(2),
'program': m.group(3),
'processid': m.group(4),
'message': m.group(5)
}
output_record = {
'recordId': record['recordId'],
'result': 'Ok',
'data': base64.b64encode(json.dumps(data_field))
}
else:
print('Parsing failed')
failed_record_cnt += 1
output_record = {
'recordId': record['recordId'],
'result': 'ProcessingFailed',
'data': record['data']
}
output.append(output_record)
print('Processing completed. Successful records {}, Failed records {}.'.format(succeeded_record_cnt, failed_record_cnt))
return {'records': output}
When I deploy this I am getting error like, it is expecting data as an object and I decoded that records and deployed it again but getting similar errors:
[ERROR] TypeError: cannot use a string pattern on a bytes-like object
Traceback (most recent call last):
File "/var/task/ec2_logs_parquet.py", line 24, in lambda_handler
m = p.match(payload)
I tried to fix that using below patches to decode it and created seperate variable to pass the data but it is not working.
payload = base64.b64decode(record['data'])
payload_str = payload.decode('utf-8')
p = re.compile(regex_string)
m = p.match(payload_str)
Still getting error. Did I miss anything here ?
This is a much simpler way to parse syslog records.
def convert_log( row ):
time = row[:15]
host,prog,message = row[16:].split(maxsplit=2)
if '[' in prog:
prog,process = prog.split('[')
process = process[:-2]
else:
proc = prog[:-1]
process = ''
data_field = {
'timestamp': time,
'hostname': host,
'program': prog,
'processid': process,
'message': message
}
return data_field
for line in open('/var/log/syslog'):
print( convert_log( line ))