add analyzer that analyzes spending using the output of bac_extractor

This commit is contained in:
Fabian Montero 2026-03-09 17:04:37 -06:00
parent 12e818b82c
commit 831df98437
Signed by: fabian
GPG key ID: 8036F30EDBAC8447

251
bac_analyze.py Executable file
View file

@ -0,0 +1,251 @@
#!/usr/bin/env python3
"""
BAC Spending Analysis Tool
Analyzes transaction JSON output from bac_extract.py.
Provides spending categorization and visualization.
"""
import argparse
import json
import sys
from collections import defaultdict
from pathlib import Path
try:
import matplotlib.pyplot as plt
HAS_MATPLOTLIB = True
except ImportError:
HAS_MATPLOTLIB = False
def load_transactions(json_files: list[Path]) -> list[dict]:
"""Load and merge transactions from multiple JSON files."""
transactions = []
for path in json_files:
with open(path, encoding="utf-8") as f:
data = json.load(f)
# Only include purchases, skip other_charges and voluntary_services
transactions.extend(data.get("purchases", []))
return transactions
def load_categories(path: Path) -> dict[str, list[str]]:
"""Load category patterns from JSON file."""
with open(path, encoding="utf-8") as f:
return json.load(f)
def categorize(description: str, categories: dict[str, list[str]]) -> str:
"""Return category for a transaction description."""
desc_upper = description.upper()
for category, patterns in categories.items():
for pattern in patterns:
if pattern.upper() in desc_upper:
return category
return "Other"
def aggregate_by_category(
transactions: list[dict], categories: dict[str, list[str]]
) -> dict[str, dict[str, float]]:
"""Sum spending per category, separate CRC/USD."""
result = defaultdict(lambda: {"crc": 0.0, "usd": 0.0})
for txn in transactions:
cat = categorize(txn["description"], categories)
if txn["amount_crc"]:
result[cat]["crc"] += txn["amount_crc"]
if txn["amount_usd"]:
result[cat]["usd"] += txn["amount_usd"]
return dict(result)
def aggregate_by_month(transactions: list[dict]) -> dict[str, dict[str, float]]:
"""Sum spending per month (YYYY-MM), separate CRC/USD."""
result = defaultdict(lambda: {"crc": 0.0, "usd": 0.0})
for txn in transactions:
month = txn["date"][:7] # YYYY-MM
if txn["amount_crc"]:
result[month]["crc"] += txn["amount_crc"]
if txn["amount_usd"]:
result[month]["usd"] += txn["amount_usd"]
return dict(result)
def print_summary(by_category: dict, by_month: dict):
"""Print text summary to stdout."""
print("=== Spending by Category ===")
# Sort by CRC amount descending
sorted_cats = sorted(by_category.items(), key=lambda x: x[1]["crc"], reverse=True)
total_crc = 0.0
total_usd = 0.0
for cat, amounts in sorted_cats:
crc, usd = amounts["crc"], amounts["usd"]
total_crc += crc
total_usd += usd
print(f"{cat:20} CRC {crc:>12,.2f} USD {usd:>8,.2f}")
print("-" * 50)
print(f"{'Total':20} CRC {total_crc:>12,.2f} USD {total_usd:>8,.2f}")
print("\n=== Monthly Spending ===")
for month in sorted(by_month.keys()):
amounts = by_month[month]
print(f"{month}: CRC {amounts['crc']:>12,.2f} USD {amounts['usd']:>8,.2f}")
def plot_bar(data: dict, output: Path, show: bool):
"""Bar chart of category spending (CRC)."""
# Sort by amount descending
sorted_items = sorted(data.items(), key=lambda x: x[1]["crc"], reverse=True)
categories = [item[0] for item in sorted_items]
amounts = [item[1]["crc"] for item in sorted_items]
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(categories, amounts, color="steelblue")
ax.set_xlabel("Amount (CRC)")
ax.set_title("Spending by Category")
ax.invert_yaxis()
# Add value labels
for bar, amount in zip(bars, amounts):
ax.text(bar.get_width() + max(amounts) * 0.01, bar.get_y() + bar.get_height() / 2,
f"{amount:,.0f}", va="center", fontsize=9)
plt.tight_layout()
plt.savefig(output, dpi=150)
print(f"Saved bar chart to {output}")
if show:
plt.show()
plt.close()
def plot_pie(data: dict, output: Path, show: bool):
"""Pie chart of category distribution (CRC)."""
# Filter out zero/negative and sort
filtered = {k: v["crc"] for k, v in data.items() if v["crc"] > 0}
sorted_items = sorted(filtered.items(), key=lambda x: x[1], reverse=True)
categories = [item[0] for item in sorted_items]
amounts = [item[1] for item in sorted_items]
fig, ax = plt.subplots(figsize=(10, 8))
wedges, texts, autotexts = ax.pie(
amounts, labels=categories, autopct="%1.1f%%",
startangle=90, pctdistance=0.75
)
ax.set_title("Spending Distribution by Category (CRC)")
plt.tight_layout()
plt.savefig(output, dpi=150)
print(f"Saved pie chart to {output}")
if show:
plt.show()
plt.close()
def plot_timeline(data: dict, output: Path, show: bool):
"""Line chart of monthly spending (CRC)."""
months = sorted(data.keys())
amounts = [data[m]["crc"] for m in months]
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(months, amounts, marker="o", linewidth=2, markersize=8, color="steelblue")
ax.fill_between(months, amounts, alpha=0.3, color="steelblue")
ax.set_xlabel("Month")
ax.set_ylabel("Amount (CRC)")
ax.set_title("Monthly Spending")
ax.tick_params(axis="x", rotation=45)
# Add value labels
for month, amount in zip(months, amounts):
ax.annotate(f"{amount:,.0f}", (month, amount),
textcoords="offset points", xytext=(0, 10),
ha="center", fontsize=9)
plt.tight_layout()
plt.savefig(output, dpi=150)
print(f"Saved timeline chart to {output}")
if show:
plt.show()
plt.close()
def main():
parser = argparse.ArgumentParser(
description="Analyze spending from BAC transaction JSON files"
)
parser.add_argument(
"json_files", type=Path, nargs="+",
help="JSON files from bac_extract.py"
)
parser.add_argument(
"--graph", choices=["bar", "pie", "timeline", "all"],
help="Generate graph type (use 'all' for all graphs)"
)
parser.add_argument(
"-o", "--output", type=Path,
help="Output file for graph (default: spending_<type>.png)"
)
parser.add_argument(
"--show", action="store_true",
help="Display graph interactively"
)
parser.add_argument(
"--categories", type=Path, default=Path("categories.json"),
help="Custom categories file (default: categories.json)"
)
args = parser.parse_args()
# Validate input files
for path in args.json_files:
if not path.exists():
sys.exit(f"Error: File not found: {path}")
# Check matplotlib early if graph requested
if args.graph and not HAS_MATPLOTLIB:
sys.exit("Error: matplotlib is required for graphs. Install with: pip install matplotlib")
# Load categories
if not args.categories.exists():
sys.exit(f"Error: Categories file not found: {args.categories}")
categories = load_categories(args.categories)
# Load transactions
transactions = load_transactions(args.json_files)
if not transactions:
sys.exit("Error: No transactions found in input files")
# Aggregate data
by_category = aggregate_by_category(transactions, categories)
by_month = aggregate_by_month(transactions)
# Print summary
print_summary(by_category, by_month)
# Generate graph if requested
if args.graph:
if args.graph == "all":
prefix = args.output.stem if args.output else "spending"
suffix = args.output.suffix if args.output else ".png"
plot_bar(by_category, Path(f"{prefix}_bar{suffix}"), args.show)
plot_pie(by_category, Path(f"{prefix}_pie{suffix}"), args.show)
plot_timeline(by_month, Path(f"{prefix}_timeline{suffix}"), args.show)
else:
output = args.output or Path(f"spending_{args.graph}.png")
if args.graph == "bar":
plot_bar(by_category, output, args.show)
elif args.graph == "pie":
plot_pie(by_category, output, args.show)
elif args.graph == "timeline":
plot_timeline(by_month, output, args.show)
if __name__ == "__main__":
main()