️ use generator to parse JSON lines

Instead of parsing the entire log file at once, use a generator and return JSON lines.
This commit is contained in:
Marc Koch 2024-07-09 13:38:50 +02:00
parent 1620ed61a3
commit cfc544e3d7
Signed by: marc
GPG Key ID: 12406554CFB028B9
2 changed files with 27 additions and 10 deletions

View File

@ -24,9 +24,14 @@ Set JSON indentation to two spaces:
cat proxy.log | cpl2j -s 2
```
Output JSON lines:
```bash
cpl2j -l -f /var/www/proxy_logs/proxy.log
```
### Tip
Use [VisiData](https://github.com/saulpw/visidata) to explore the data in a very comfortable way:
/var/www/proxy_logs/proxy.log
```bash
cpl2j -f /var/www/proxy_logs/proxy.log | vd -f json
cpl2j -l -f /var/www/proxy_logs/proxy.log | vd -f jsonl
```

View File

@ -9,7 +9,7 @@ from . import REQUEST_LINE_RE, VALUES_LINE_RE, CLOSING_LINE_RE
# Setup argparse
argparser = argparse.ArgumentParser(
description="Translate a CiviProxy logfile into JSON format. ")
description="Translate a CiviProxy logfile into JSON Lines format.")
argparser.add_argument("-f",
"--logfile",
help="CiviProxy logfile",
@ -19,7 +19,11 @@ argparser.add_argument("-i",
"--indent",
help="number of spaces to indent JSON output",
type=int,
default=4)
default=None)
argparser.add_argument("-l",
"--lines",
help="output JSON Lines instead of JSON",
action="store_true")
def main():
@ -31,13 +35,16 @@ def main():
sys.exit()
# Parse logfile and print it to console
print(json.dumps(translate_logfile(args.logfile), indent=args.indent))
for line in translate_logfile(args.logfile,
indent=args.indent,
json_lines=args.lines):
print(line)
def translate_logfile(logfile: TextIO) -> list:
json_ = []
def translate_logfile(logfile: TextIO, indent: int, json_lines: bool) -> str:
_json = []
array = {}
with logfile as file:
array = {}
for line in file:
request_line = re.search(REQUEST_LINE_RE, line)
values_line = re.search(VALUES_LINE_RE, line)
@ -53,9 +60,14 @@ def translate_logfile(logfile: TextIO) -> list:
array[values_line.group("key")] = values_line.group("value")
elif close_line:
if array:
json_.append(array)
array = {}
return json_
if json_lines:
yield json.dumps(array, indent=indent)
array = {}
else:
_json.append(array)
array = {}
if not json_lines:
yield json.dumps(_json, indent=indent)
if __name__ == "__main__":