From 831df9843762f6b7edd80f6edc0ee69734d7ba53 Mon Sep 17 00:00:00 2001 From: Fabian Montero Date: Mon, 9 Mar 2026 17:04:37 -0600 Subject: [PATCH] add analyzer that analyzes spending using the output of bac_extractor --- bac_analyze.py | 251 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 251 insertions(+) create mode 100755 bac_analyze.py diff --git a/bac_analyze.py b/bac_analyze.py new file mode 100755 index 0000000..c3808c0 --- /dev/null +++ b/bac_analyze.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +""" +BAC Spending Analysis Tool + +Analyzes transaction JSON output from bac_extract.py. +Provides spending categorization and visualization. +""" + +import argparse +import json +import sys +from collections import defaultdict +from pathlib import Path + +try: + import matplotlib.pyplot as plt + HAS_MATPLOTLIB = True +except ImportError: + HAS_MATPLOTLIB = False + + +def load_transactions(json_files: list[Path]) -> list[dict]: + """Load and merge transactions from multiple JSON files.""" + transactions = [] + for path in json_files: + with open(path, encoding="utf-8") as f: + data = json.load(f) + # Only include purchases, skip other_charges and voluntary_services + transactions.extend(data.get("purchases", [])) + return transactions + + +def load_categories(path: Path) -> dict[str, list[str]]: + """Load category patterns from JSON file.""" + with open(path, encoding="utf-8") as f: + return json.load(f) + + +def categorize(description: str, categories: dict[str, list[str]]) -> str: + """Return category for a transaction description.""" + desc_upper = description.upper() + for category, patterns in categories.items(): + for pattern in patterns: + if pattern.upper() in desc_upper: + return category + return "Other" + + +def aggregate_by_category( + transactions: list[dict], categories: dict[str, list[str]] +) -> dict[str, dict[str, float]]: + """Sum spending per category, separate CRC/USD.""" + result = defaultdict(lambda: {"crc": 0.0, "usd": 0.0}) + for txn in transactions: + cat = categorize(txn["description"], categories) + if txn["amount_crc"]: + result[cat]["crc"] += txn["amount_crc"] + if txn["amount_usd"]: + result[cat]["usd"] += txn["amount_usd"] + return dict(result) + + +def aggregate_by_month(transactions: list[dict]) -> dict[str, dict[str, float]]: + """Sum spending per month (YYYY-MM), separate CRC/USD.""" + result = defaultdict(lambda: {"crc": 0.0, "usd": 0.0}) + for txn in transactions: + month = txn["date"][:7] # YYYY-MM + if txn["amount_crc"]: + result[month]["crc"] += txn["amount_crc"] + if txn["amount_usd"]: + result[month]["usd"] += txn["amount_usd"] + return dict(result) + + +def print_summary(by_category: dict, by_month: dict): + """Print text summary to stdout.""" + print("=== Spending by Category ===") + + # Sort by CRC amount descending + sorted_cats = sorted(by_category.items(), key=lambda x: x[1]["crc"], reverse=True) + total_crc = 0.0 + total_usd = 0.0 + + for cat, amounts in sorted_cats: + crc, usd = amounts["crc"], amounts["usd"] + total_crc += crc + total_usd += usd + print(f"{cat:20} CRC {crc:>12,.2f} USD {usd:>8,.2f}") + + print("-" * 50) + print(f"{'Total':20} CRC {total_crc:>12,.2f} USD {total_usd:>8,.2f}") + + print("\n=== Monthly Spending ===") + for month in sorted(by_month.keys()): + amounts = by_month[month] + print(f"{month}: CRC {amounts['crc']:>12,.2f} USD {amounts['usd']:>8,.2f}") + + +def plot_bar(data: dict, output: Path, show: bool): + """Bar chart of category spending (CRC).""" + # Sort by amount descending + sorted_items = sorted(data.items(), key=lambda x: x[1]["crc"], reverse=True) + categories = [item[0] for item in sorted_items] + amounts = [item[1]["crc"] for item in sorted_items] + + fig, ax = plt.subplots(figsize=(10, 6)) + bars = ax.barh(categories, amounts, color="steelblue") + ax.set_xlabel("Amount (CRC)") + ax.set_title("Spending by Category") + ax.invert_yaxis() + + # Add value labels + for bar, amount in zip(bars, amounts): + ax.text(bar.get_width() + max(amounts) * 0.01, bar.get_y() + bar.get_height() / 2, + f"{amount:,.0f}", va="center", fontsize=9) + + plt.tight_layout() + plt.savefig(output, dpi=150) + print(f"Saved bar chart to {output}") + + if show: + plt.show() + plt.close() + + +def plot_pie(data: dict, output: Path, show: bool): + """Pie chart of category distribution (CRC).""" + # Filter out zero/negative and sort + filtered = {k: v["crc"] for k, v in data.items() if v["crc"] > 0} + sorted_items = sorted(filtered.items(), key=lambda x: x[1], reverse=True) + + categories = [item[0] for item in sorted_items] + amounts = [item[1] for item in sorted_items] + + fig, ax = plt.subplots(figsize=(10, 8)) + wedges, texts, autotexts = ax.pie( + amounts, labels=categories, autopct="%1.1f%%", + startangle=90, pctdistance=0.75 + ) + ax.set_title("Spending Distribution by Category (CRC)") + + plt.tight_layout() + plt.savefig(output, dpi=150) + print(f"Saved pie chart to {output}") + + if show: + plt.show() + plt.close() + + +def plot_timeline(data: dict, output: Path, show: bool): + """Line chart of monthly spending (CRC).""" + months = sorted(data.keys()) + amounts = [data[m]["crc"] for m in months] + + fig, ax = plt.subplots(figsize=(10, 6)) + ax.plot(months, amounts, marker="o", linewidth=2, markersize=8, color="steelblue") + ax.fill_between(months, amounts, alpha=0.3, color="steelblue") + + ax.set_xlabel("Month") + ax.set_ylabel("Amount (CRC)") + ax.set_title("Monthly Spending") + ax.tick_params(axis="x", rotation=45) + + # Add value labels + for month, amount in zip(months, amounts): + ax.annotate(f"{amount:,.0f}", (month, amount), + textcoords="offset points", xytext=(0, 10), + ha="center", fontsize=9) + + plt.tight_layout() + plt.savefig(output, dpi=150) + print(f"Saved timeline chart to {output}") + + if show: + plt.show() + plt.close() + + +def main(): + parser = argparse.ArgumentParser( + description="Analyze spending from BAC transaction JSON files" + ) + parser.add_argument( + "json_files", type=Path, nargs="+", + help="JSON files from bac_extract.py" + ) + parser.add_argument( + "--graph", choices=["bar", "pie", "timeline", "all"], + help="Generate graph type (use 'all' for all graphs)" + ) + parser.add_argument( + "-o", "--output", type=Path, + help="Output file for graph (default: spending_.png)" + ) + parser.add_argument( + "--show", action="store_true", + help="Display graph interactively" + ) + parser.add_argument( + "--categories", type=Path, default=Path("categories.json"), + help="Custom categories file (default: categories.json)" + ) + args = parser.parse_args() + + # Validate input files + for path in args.json_files: + if not path.exists(): + sys.exit(f"Error: File not found: {path}") + + # Check matplotlib early if graph requested + if args.graph and not HAS_MATPLOTLIB: + sys.exit("Error: matplotlib is required for graphs. Install with: pip install matplotlib") + + # Load categories + if not args.categories.exists(): + sys.exit(f"Error: Categories file not found: {args.categories}") + categories = load_categories(args.categories) + + # Load transactions + transactions = load_transactions(args.json_files) + if not transactions: + sys.exit("Error: No transactions found in input files") + + # Aggregate data + by_category = aggregate_by_category(transactions, categories) + by_month = aggregate_by_month(transactions) + + # Print summary + print_summary(by_category, by_month) + + # Generate graph if requested + if args.graph: + if args.graph == "all": + prefix = args.output.stem if args.output else "spending" + suffix = args.output.suffix if args.output else ".png" + plot_bar(by_category, Path(f"{prefix}_bar{suffix}"), args.show) + plot_pie(by_category, Path(f"{prefix}_pie{suffix}"), args.show) + plot_timeline(by_month, Path(f"{prefix}_timeline{suffix}"), args.show) + else: + output = args.output or Path(f"spending_{args.graph}.png") + if args.graph == "bar": + plot_bar(by_category, output, args.show) + elif args.graph == "pie": + plot_pie(by_category, output, args.show) + elif args.graph == "timeline": + plot_timeline(by_month, output, args.show) + + +if __name__ == "__main__": + main()