forked from mk12/scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathwordcount.py
More file actions
executable file
·61 lines (50 loc) · 1.57 KB
/
wordcount.py
File metadata and controls
executable file
·61 lines (50 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python3
import argparse
import csv
from dateutil.parser import parse
import re
import sys
def entries(input):
date = None
entry = ""
for line in input:
if not line.strip():
continue
if line.startswith("#"):
if date is not None:
yield date, entry
entry = ""
try:
date = parse(line[1:]).date()
except ValueError:
print(f"Failed to parse date: {line}", file=sys.stderr)
sys.exit(1)
else:
entry += line
if date is not None:
yield date, entry
def write_counts(output, input, regex):
print("date,count", file=output)
for date, entry in entries(input):
count = len(re.findall(regex, entry))
print(f"{date},{count}", file=output)
def parse_args():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("input", metavar="F", help="Journal input file")
parser.add_argument(
"-o", "--output", default="wordcount.csv", help="Output CSV file"
)
parser.add_argument(
"-r", "--regex", default=r"\b\w+\b", help="Count occurrences of regex per day"
)
return parser.parse_args()
def main():
args = parse_args()
regex = re.compile(args.regex, re.IGNORECASE)
print(f"Using regex: {regex}")
with open(args.input) as input_file, open(args.output, "w") as output_file:
write_counts(output_file, input_file, regex)
if __name__ == "__main__":
main()