️ use generator to parse JSON lines

Instead of parsing the entire log file at once, use a generator and return JSON lines.
This commit is contained in:
Marc Koch 2024-07-09 13:38:50 +02:00
parent 1620ed61a3
commit cfc544e3d7
Signed by: marc
GPG Key ID: 12406554CFB028B9
2 changed files with 27 additions and 10 deletions

View File

@ -24,9 +24,14 @@ Set JSON indentation to two spaces:
cat proxy.log | cpl2j -s 2 cat proxy.log | cpl2j -s 2
``` ```
Output JSON lines:
```bash
cpl2j -l -f /var/www/proxy_logs/proxy.log
```
### Tip ### Tip
Use [VisiData](https://github.com/saulpw/visidata) to explore the data in a very comfortable way: Use [VisiData](https://github.com/saulpw/visidata) to explore the data in a very comfortable way:
/var/www/proxy_logs/proxy.log /var/www/proxy_logs/proxy.log
```bash ```bash
cpl2j -f /var/www/proxy_logs/proxy.log | vd -f json cpl2j -l -f /var/www/proxy_logs/proxy.log | vd -f jsonl
``` ```

View File

@ -9,7 +9,7 @@ from . import REQUEST_LINE_RE, VALUES_LINE_RE, CLOSING_LINE_RE
# Setup argparse # Setup argparse
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
description="Translate a CiviProxy logfile into JSON format. ") description="Translate a CiviProxy logfile into JSON Lines format.")
argparser.add_argument("-f", argparser.add_argument("-f",
"--logfile", "--logfile",
help="CiviProxy logfile", help="CiviProxy logfile",
@ -19,7 +19,11 @@ argparser.add_argument("-i",
"--indent", "--indent",
help="number of spaces to indent JSON output", help="number of spaces to indent JSON output",
type=int, type=int,
default=4) default=None)
argparser.add_argument("-l",
"--lines",
help="output JSON Lines instead of JSON",
action="store_true")
def main(): def main():
@ -31,13 +35,16 @@ def main():
sys.exit() sys.exit()
# Parse logfile and print it to console # Parse logfile and print it to console
print(json.dumps(translate_logfile(args.logfile), indent=args.indent)) for line in translate_logfile(args.logfile,
indent=args.indent,
json_lines=args.lines):
print(line)
def translate_logfile(logfile: TextIO) -> list: def translate_logfile(logfile: TextIO, indent: int, json_lines: bool) -> str:
json_ = [] _json = []
array = {}
with logfile as file: with logfile as file:
array = {}
for line in file: for line in file:
request_line = re.search(REQUEST_LINE_RE, line) request_line = re.search(REQUEST_LINE_RE, line)
values_line = re.search(VALUES_LINE_RE, line) values_line = re.search(VALUES_LINE_RE, line)
@ -53,9 +60,14 @@ def translate_logfile(logfile: TextIO) -> list:
array[values_line.group("key")] = values_line.group("value") array[values_line.group("key")] = values_line.group("value")
elif close_line: elif close_line:
if array: if array:
json_.append(array) if json_lines:
array = {} yield json.dumps(array, indent=indent)
return json_ array = {}
else:
_json.append(array)
array = {}
if not json_lines:
yield json.dumps(_json, indent=indent)
if __name__ == "__main__": if __name__ == "__main__":