Skip to content

Commit da978a8

Browse files
authored
Support for parsing dividend payouts (#12)
* Support for parsing dividend payout transactions * code formatting (black) * fixed cli calculation * add support for identifying dividend reinvestments * v0.3.7 version bump
1 parent 2b5e72f commit da978a8

File tree

10 files changed

+59
-18
lines changed

10 files changed

+59
-18
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## 0.3.7 - 2020-12-24
4+
5+
- Support for parsing dividend transactions
6+
37
## 0.3.6 - 2020-12-21
48

59
- Support for parsing folios without advisor

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,10 @@ data = casparser.read_cas_pdf('/path/to/cas/pdf/file.pdf', 'password')
7272
"amount": "number",
7373
"units": "number",
7474
"nav": "number",
75-
"balance": "number"
75+
"balance": "number",
76+
"is_dividend_payout": "boolean",
77+
"is_dividend_reinvestment": "boolean",
78+
"dividend_rate": null
7679
}
7780
]
7881
}

casparser/VERSION.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.3.6
1+
0.3.7

casparser/cli.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ def print_summary(data, tablefmt="fancy_grid", output_filename=None, include_zer
8787
if scheme["close"] < 1e-3 and not include_zero_folios:
8888
continue
8989

90-
calc_close = scheme["open"] + sum([x["units"] for x in scheme["transactions"]])
90+
calc_close = scheme["open"] + sum(
91+
[x["units"] for x in scheme["transactions"] if x["units"] is not None]
92+
)
9193
valuation = scheme["valuation"]
9294

9395
# Check is calculated close (i.e. open + units from all transactions) is same as
@@ -186,7 +188,7 @@ def print_summary(data, tablefmt="fancy_grid", output_filename=None, include_zer
186188
"-a",
187189
"--include-all",
188190
is_flag=True,
189-
help="Include schemes with zero valuation in the summary output"
191+
help="Include schemes with zero valuation in the summary output",
190192
)
191193
@click.option(
192194
"--force-pdfminer", is_flag=True, help="Force PDFMiner parser even if MuPDF is detected"
@@ -209,7 +211,10 @@ def cli(output, summary, password, include_all, force_pdfminer, filename):
209211
sys.exit(1)
210212
if summary:
211213
print_summary(
212-
data, tablefmt=summary, include_zero_folios=include_all, output_filename=None if output_ext == ".json" else output
214+
data,
215+
tablefmt=summary,
216+
include_zero_folios=include_all,
217+
output_filename=None if output_ext == ".json" else output,
213218
)
214219
if output_ext == ".json":
215220
with open(output, "w") as fp:

casparser/parsers/pdfminer.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ def parse_investor_info(layout, width, height) -> InvestorInfo:
4646
mobile = m.group(1).strip()
4747
address_lines.append(txt)
4848
if mobile is not None:
49-
return InvestorInfo(email=email, name=name, mobile=mobile, address="\n".join(address_lines))
49+
return InvestorInfo(
50+
email=email, name=name, mobile=mobile, address="\n".join(address_lines)
51+
)
5052
if email is None or mobile is None:
5153
raise CASParseError("Unable to parse investor data")
5254

casparser/process.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
from .exceptions import HeaderParseError, CASParseError
77
from .regex import FOLIO_RE, HEADER_RE, SCHEME_RE
8-
from .regex import CLOSE_UNITS_RE, NAV_RE, OPEN_UNITS_RE, VALUATION_RE, TRANSACTION_RE
8+
from .regex import CLOSE_UNITS_RE, NAV_RE, OPEN_UNITS_RE, VALUATION_RE
9+
from .regex import DESCRIPTION_TAIL_RE, DIVIDEND_RE, TRANSACTION_RE
910

1011

1112
def parse_header(text):
@@ -31,8 +32,14 @@ def process_cas_text(text):
3132
current_folio = None
3233
current_amc = None
3334
curr_scheme_data = {}
35+
balance = Decimal(0.0)
3436
lines = text.split("\u2029")
3537
for line in lines:
38+
if m := re.search(DESCRIPTION_TAIL_RE, line, re.I | re.DOTALL):
39+
description_tail = m.group(1).rstrip()
40+
line = line.replace(description_tail, "")
41+
else:
42+
description_tail = ""
3643
if amc_match := re.search(r"^(.+?)\s+(MF|Mutual\s+Fund)$", line, re.I | re.DOTALL):
3744
current_amc = amc_match.group(0)
3845
elif m := re.search(FOLIO_RE, line, re.I):
@@ -70,6 +77,7 @@ def process_cas_text(text):
7077
"valuation": {"date": None, "value": 0, "nav": 0},
7178
"transactions": [],
7279
}
80+
balance = Decimal(0.0)
7381
if not curr_scheme_data:
7482
continue
7583
if m := re.search(OPEN_UNITS_RE, line):
@@ -90,11 +98,23 @@ def process_cas_text(text):
9098
continue
9199
if m := re.search(TRANSACTION_RE, line, re.DOTALL):
92100
date = date_parser.parse(m.group(1)).date()
101+
desc = m.group(2).strip() + description_tail
93102
amt = Decimal(m.group(3).replace(",", "_").replace("(", "-"))
94-
units = Decimal(m.group(4).replace(",", "_").replace("(", "-"))
95-
nav = Decimal(m.group(5).replace(",", "_"))
96-
balance = Decimal(m.group(6).replace(",", "_"))
97-
desc = m.group(2).strip()
103+
if m.group(4) is None:
104+
units = None
105+
nav = None
106+
else:
107+
units = Decimal(m.group(4).replace(",", "_").replace("(", "-"))
108+
nav = Decimal(m.group(5).replace(",", "_"))
109+
balance = Decimal(m.group(6).replace(",", "_"))
110+
if div_match := re.search(DIVIDEND_RE, desc, re.I | re.DOTALL):
111+
reinvest_flag, rate = div_match.groups()
112+
is_dividend_payout = reinvest_flag is None
113+
is_dividend_reinvestment = not is_dividend_payout
114+
dividend_rate = Decimal(rate)
115+
else:
116+
is_dividend_payout = is_dividend_reinvestment = False
117+
dividend_rate = None
98118
curr_scheme_data["transactions"].append(
99119
{
100120
"date": date,
@@ -103,6 +123,9 @@ def process_cas_text(text):
103123
"units": units,
104124
"nav": nav,
105125
"balance": balance,
126+
"is_dividend_payout": is_dividend_payout,
127+
"is_dividend_reinvestment": is_dividend_reinvestment,
128+
"dividend_rate": dividend_rate,
106129
}
107130
)
108131
if curr_scheme_data:

casparser/regex.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
NAV_RE = r"NAV\s+on\s+(\d{2}-[A-Za-z]{3}-\d{4})\s*:\s*INR\s*([\d,.]+)"
1313

1414
TRANSACTION_RE = (
15-
r"(\d{2}-[A-Za-z]{3}-\d{4})\t\t([^\t]+?)\t\t([(\d,.]+)\)*\t\t"
16-
r"([(\d,.]+)\)*\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*"
15+
# r"(\d{2}-[A-Za-z]{3}-\d{4})\t\t([^\t]+?)\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*"
16+
r"(\d{2}-[A-Za-z]{3}-\d{4})\t\t([^\t]+?)\t\t([(\d,.]+)\)*(?:\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*\t\t([(\d,.]+)\)*)*"
1717
)
18+
DIVIDEND_RE = r"dividend.+?(reinvest)*.+?@\s+Rs\.\s*([\d\.]+)\s+per\s+unit"
19+
20+
DESCRIPTION_TAIL_RE = r"\d{2}-[A-Za-z]{3}-\d{4}\t\t.*(\n[^\t]+)[\t|$]"

casparser/types.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,12 @@ class TransactionType(TypedDict):
2222
date: Union[date, str]
2323
description: str
2424
amount: Union[Decimal, float]
25-
units: Union[Decimal, float]
26-
nav: Union[Decimal, float]
25+
units: Union[Decimal, float, None]
26+
nav: Union[Decimal, float, None]
2727
balance: Union[Decimal, float]
28+
is_dividend_payout: bool
29+
is_dividend_reinvestment: bool
30+
dividend_rate: Union[Decimal, float, None]
2831

2932

3033
class SchemeValuationType(TypedDict):

tests/files.enc

-57.6 KB
Binary file not shown.

tests/test_mupdf.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,7 @@ def test_cli(self, tmpdir):
3333
assert result.exit_code != 1
3434
assert "File saved" in result.output
3535

36-
result = runner.invoke(
37-
cli, [self.cams_file_name, "-p", self.cams_password, "-s", "html"]
38-
)
36+
result = runner.invoke(cli, [self.kfintech_file_name, "-p", self.kfintech_password, "-s", "html"])
3937
assert result.exit_code != 1
4038
assert "<table>\n<thead>" in result.output
4139

0 commit comments

Comments
 (0)