From 62450842c31299f9f1644927d7b00cc46f05c6c2 Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 13:58:34 -0600
Subject: [PATCH 1/5] update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 76de699..f04df3a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 *.pdf
 *.json
+__pycache__/

From a05f701f1662b83228279fc8f35f2040bc31b728 Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 13:59:03 -0600
Subject: [PATCH 2/5] remove card suffix functionality

---
 CLAUDE.md      |   9 ++--
 bac_extract.py | 124 ++++++++++++++-----------------------------------
 2 files changed, 39 insertions(+), 94 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 08dc084..ee95b39 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -13,11 +13,11 @@ Single-script Python tool that extracts credit card transactions from BAC Costa
 ## Usage
 
 ```bash
-python bac_extract.py <pdf_file> <card_suffix> [options]
+python bac_extract.py <pdf_file> [options]
 
 # Examples
-python bac_extract.py EstadodeCuenta.pdf 1234 --pretty
-python bac_extract.py statement.pdf 1234 -o output.json -v
+python bac_extract.py EstadodeCuenta.pdf --pretty
+python bac_extract.py statement.pdf -o output.json -v
 ```
 
 Options:
@@ -31,8 +31,7 @@ The extraction pipeline:
 1. Validates PDF is a BAC statement (`is_bac_statement`)
 2. Locates section B via regex patterns (`find_section_b_start`, `is_section_end`)
 3. Extracts tables page-by-page using pdfplumber
-4. Filters transactions by card suffix (last 4 digits)
-5. Parses Spanish dates (D-MMM-YY format) and amounts with comma separators
+4. Parses Spanish dates (D-MMM-YY format) and amounts with comma separators
 
 Key parsing functions:
 - `parse_spanish_date`: Converts "15-ENE-25" to "2025-01-15"
diff --git a/bac_extract.py b/bac_extract.py
index 850bef1..adff408 100755
--- a/bac_extract.py
+++ b/bac_extract.py
@@ -72,21 +72,19 @@ def parse_amount(amount_str: str) -> Optional[float]:
     Parse amount string with comma thousands separator.
     Handles trailing '-' for negative values.
     """
-    if not amount_str or not amount_str.strip():
+    if not amount_str:
         return None
 
     amount_str = amount_str.strip()
+    if not amount_str:
+        return None
 
-    # Check for trailing negative sign
     is_negative = amount_str.endswith("-")
     if is_negative:
-        amount_str = amount_str[:-1].strip()
+        amount_str = amount_str[:-1]
 
-    # Remove thousands separators (commas) and handle decimal point
-    # Format: 1,234.56 or 1,234,567.89
     try:
-        amount_str = amount_str.replace(",", "")
-        amount = float(amount_str)
+        amount = float(amount_str.replace(",", ""))
         return -amount if is_negative else amount
     except ValueError:
         return None
@@ -107,19 +105,17 @@ def extract_statement_date(pdf: pdfplumber.PDF) -> Optional[str]:
         return None
 
     first_page_text = pdf.pages[0].extract_text() or ""
-
-    # Look for date patterns in the first page
-    # Common format: "Fecha de corte: DD-MMM-YY" or similar
     date_matches = DATE_PATTERN.findall(first_page_text)
-    if date_matches:
-        # Use the first date found as statement date
-        day, month_abbr, year = date_matches[0]
-        month = SPANISH_MONTHS.get(month_abbr.upper())
-        if month:
-            full_year = 2000 + int(year)
-            return f"{full_year:04d}-{month:02d}-{int(day):02d}"
+    if not date_matches:
+        return None
 
-    return None
+    day, month_abbr, year = date_matches[0]
+    month = SPANISH_MONTHS.get(month_abbr.upper())
+    if not month:
+        return None
+
+    full_year = 2000 + int(year)
+    return f"{full_year:04d}-{month:02d}-{int(day):02d}"
 
 
 def find_section_b_start(page_text: str) -> bool:
@@ -128,10 +124,7 @@ def find_section_b_start(page_text: str) -> bool:
         r"B\)\s*Detalle\s+de\s+compras",
         r"Detalle\s+de\s+compras\s+del\s+periodo",
     ]
-    for pattern in patterns:
-        if re.search(pattern, page_text, re.IGNORECASE):
-            return True
-    return False
+    return any(re.search(p, page_text, re.IGNORECASE) for p in patterns)
 
 
 def is_section_end(text: str) -> bool:
@@ -142,10 +135,7 @@ def is_section_end(text: str) -> bool:
         r"Detalle\s+de\s+intereses",
         r"D\)\s*Detalle",
     ]
-    for pattern in end_patterns:
-        if re.search(pattern, text, re.IGNORECASE):
-            return True
-    return False
+    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
 
 
 def extract_card_holder(row_text: str) -> Optional[tuple[str, str]]:
@@ -176,18 +166,16 @@ def parse_transaction_line(line: str) -> Optional[dict]:
 
     reference = match.group(1)
     date_str = match.group(2)
-    desc_and_loc = match.group(3).strip()
+    description = match.group(3).strip()
     currency = match.group(4).upper()
     amount_str = match.group(5)
     is_negative = match.group(6) == "-"
 
-    # Parse date
     date = parse_spanish_date(date_str)
     if not date:
         logger.warning(f"Could not parse date '{date_str}' for reference {reference}")
         return None
 
-    # Parse amount
     amount = parse_amount(amount_str)
     if amount is None:
         logger.warning(f"Could not parse amount '{amount_str}' for reference {reference}")
@@ -195,34 +183,23 @@ def parse_transaction_line(line: str) -> Optional[dict]:
     if is_negative:
         amount = -amount
 
-    # Split description and location
-    # Location is typically at the end, often a short suffix like "ANILL", "San Jose"
-    # For now, keep everything as description
-    description = desc_and_loc
-    location = None
-
-    # Set amount in appropriate currency field
-    amount_crc = amount if currency == "CRC" else None
-    amount_usd = amount if currency == "USD" else None
-
     return {
         "reference": reference,
         "date": date,
         "description": description,
-        "location": location,
+        "location": None,
         "currency": currency,
-        "amount_crc": amount_crc,
-        "amount_usd": amount_usd,
+        "amount_crc": amount if currency == "CRC" else None,
+        "amount_usd": amount if currency == "USD" else None,
     }
 
 
-def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False) -> dict:
+def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
     """
     Extract transactions from a BAC credit card statement PDF.
 
     Args:
         pdf_path: Path to the PDF file
-        card_suffix: Last 4 digits of card to filter
         verbose: Enable verbose logging
 
     Returns:
@@ -241,11 +218,10 @@ def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False
         statement_date = extract_statement_date(pdf)
 
         transactions = []
-        current_card_suffix = None
-        current_card_name = None
+        card_suffix = None
+        card_holder_name = None
         in_section_b = False
         section_b_found = False
-        card_suffix_found = False
 
         # Start from page 2 (index 1) as page 1 is summary only
         start_page = 1 if len(pdf.pages) > 1 else 0
@@ -261,67 +237,48 @@ def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False
                 section_b_found = True
                 logger.debug(f"Found section B on page {page_num}")
 
-            # Check for section end
-            if in_section_b and is_section_end(page_text):
-                logger.debug(f"Found section end on page {page_num}")
-                # Still process this page, but mark we're ending
-
             if not in_section_b:
                 continue
 
+            # Check for section end (still process this page before breaking)
+            reached_section_end = is_section_end(page_text)
+            if reached_section_end:
+                logger.debug(f"Found section end on page {page_num}")
+
             # Parse text line by line
             for line in page_text.split("\n"):
                 line = line.strip()
                 if not line:
                     continue
 
-                # Check for card holder line
                 card_info = extract_card_holder(line)
                 if card_info:
-                    current_card_suffix, current_card_name = card_info
-                    logger.debug(f"Found card holder: {current_card_suffix} - {current_card_name}")
-                    if current_card_suffix == card_suffix:
-                        card_suffix_found = True
+                    card_suffix, card_holder_name = card_info
+                    logger.debug(f"Found card holder: {card_suffix} - {card_holder_name}")
                     continue
 
-                # Skip if we're not tracking the right card
-                if current_card_suffix != card_suffix:
-                    continue
-
-                # Try to parse as transaction
                 transaction = parse_transaction_line(line)
                 if transaction:
                     transactions.append(transaction)
                     logger.debug(f"Extracted transaction: {transaction['reference']}")
 
-            # Check if we've passed section B
-            if in_section_b and is_section_end(page_text):
+            if reached_section_end:
                 break
 
         if not section_b_found:
             raise ValueError("Section 'B) Detalle de compras del periodo' not found in PDF")
 
-        if not card_suffix_found:
-            raise ValueError(f"Card suffix '{card_suffix}' not found in statement")
-
         # Calculate summary
         total_crc = sum(t["amount_crc"] or 0 for t in transactions)
         total_usd = sum(t["amount_usd"] or 0 for t in transactions)
 
-        # Get card holder info
-        card_holder = None
-        if card_suffix_found:
-            card_holder = {
-                "card_suffix": card_suffix,
-                "name": current_card_name if current_card_suffix == card_suffix else None
-            }
+        card_holder = {"card_suffix": card_suffix, "name": card_holder_name} if card_suffix else None
 
         return {
             "metadata": {
                 "source_file": pdf_path.name,
                 "extraction_date": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
                 "statement_date": statement_date,
-                "card_filter": card_suffix,
                 "total_transactions": len(transactions)
             },
             "card_holder": card_holder,
@@ -340,8 +297,8 @@ def main():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
-  python bac_extract.py EstadodeCuenta.pdf 1234 --pretty
-  python bac_extract.py statement.pdf 1234 -o output.json -v
+  python bac_extract.py EstadodeCuenta.pdf --pretty
+  python bac_extract.py statement.pdf -o output.json -v
         """
     )
 
@@ -351,12 +308,6 @@ Examples:
         help="Path to the BAC statement PDF"
     )
 
-    parser.add_argument(
-        "card_suffix",
-        type=str,
-        help="Last 4 digits of card to filter (e.g., 1234)"
-    )
-
     parser.add_argument(
         "-o", "--output",
         type=Path,
@@ -378,11 +329,6 @@ Examples:
 
     args = parser.parse_args()
 
-    # Validate card suffix
-    if not args.card_suffix.isdigit() or len(args.card_suffix) != 4:
-        print(f"Error: Card suffix must be exactly 4 digits, got '{args.card_suffix}'", file=sys.stderr)
-        sys.exit(1)
-
     # Validate PDF file exists
     if not args.pdf_file.exists():
         print(f"Error: File not found: {args.pdf_file}", file=sys.stderr)
@@ -393,7 +339,7 @@ Examples:
         sys.exit(1)
 
     try:
-        result = extract_transactions(args.pdf_file, args.card_suffix, args.verbose)
+        result = extract_transactions(args.pdf_file, args.verbose)
 
         # Write output
         indent = 2 if args.pretty else None

From 69a773e1b31464e492d1fcde2f5d2178629eec12 Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 14:44:47 -0600
Subject: [PATCH 3/5] target sections D and E

---
 bac_extract.py | 179 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 134 insertions(+), 45 deletions(-)

diff --git a/bac_extract.py b/bac_extract.py
index adff408..6e19108 100755
--- a/bac_extract.py
+++ b/bac_extract.py
@@ -3,7 +3,10 @@
 BAC Credit Card Statement Extractor
 
 Extracts transactions from BAC Costa Rica credit card statement PDFs.
-Specifically targets section "B) Detalle de compras del periodo".
+Targets sections:
+  B) Detalle de compras del periodo
+  D) Detalle de otros cargos
+  E) Detalle de productos y servicios de elección voluntaria
 """
 
 import argparse
@@ -118,17 +121,27 @@ def extract_statement_date(pdf: pdfplumber.PDF) -> Optional[str]:
     return f"{full_year:04d}-{month:02d}-{int(day):02d}"
 
 
-def find_section_b_start(page_text: str) -> bool:
-    """Check if page contains start of section B."""
+def find_section_b_start(text: str) -> bool:
+    """Check if text contains start of section B (purchases)."""
     patterns = [
         r"B\)\s*Detalle\s+de\s+compras",
         r"Detalle\s+de\s+compras\s+del\s+periodo",
     ]
-    return any(re.search(p, page_text, re.IGNORECASE) for p in patterns)
+    return any(re.search(p, text, re.IGNORECASE) for p in patterns)
 
 
-def is_section_end(text: str) -> bool:
-    """Check if we've reached the end of section B."""
+def find_section_d_start(text: str) -> bool:
+    """Check if text contains start of section D (other charges)."""
+    return bool(re.search(r"D\)\s*Detalle\s+de\s+otros\s+cargos", text, re.IGNORECASE))
+
+
+def find_section_e_start(text: str) -> bool:
+    """Check if text contains start of section E (voluntary products/services)."""
+    return bool(re.search(r"E\)\s*Detalle\s+de\s+productos\s+y\s+servicios", text, re.IGNORECASE))
+
+
+def is_section_b_end(text: str) -> bool:
+    """Check if text indicates the end of section B."""
     end_patterns = [
         r"Total\s+de\s+compras\s+del\s+periodo",
         r"C\)\s*Detalle\s+de\s+intereses",
@@ -138,6 +151,24 @@ def is_section_end(text: str) -> bool:
     return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
 
 
+def is_section_d_end(text: str) -> bool:
+    """Check if text indicates the end of section D."""
+    end_patterns = [
+        r"Total\s+por\s+concepto\s+otros\s+cargos",
+        r"E\)\s*Detalle",
+    ]
+    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
+
+
+def is_section_e_end(text: str) -> bool:
+    """Check if text indicates the end of section E."""
+    end_patterns = [
+        r"Total\s+por\s+concepto\s+de\s+productos",
+        r"F\)\s*Cargos",
+    ]
+    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
+
+
 def extract_card_holder(row_text: str) -> Optional[tuple[str, str]]:
     """
     Extract card holder info from a row.
@@ -203,7 +234,8 @@ def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
         verbose: Enable verbose logging
 
     Returns:
-        Dictionary with metadata, card_holder, transactions, and summary
+        Dictionary with metadata, card_holders, purchases, other_charges,
+        voluntary_services, and summary
     """
     if verbose:
         logging.basicConfig(level=logging.DEBUG)
@@ -217,11 +249,18 @@ def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
 
         statement_date = extract_statement_date(pdf)
 
-        transactions = []
-        card_suffix = None
-        card_holder_name = None
-        in_section_b = False
-        section_b_found = False
+        # Transactions by section
+        purchases = []           # Section B
+        other_charges = []       # Section D
+        voluntary_services = []  # Section E
+
+        # Track card holders (may have multiple)
+        card_holders = []
+        seen_card_suffixes = set()
+
+        # Section tracking: None, "B", "D", "E"
+        current_section = None
+        sections_found = set()
 
         # Start from page 2 (index 1) as page 1 is summary only
         start_page = 1 if len(pdf.pages) > 1 else 0
@@ -231,19 +270,48 @@ def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
 
             logger.debug(f"Processing page {page_num}")
 
-            # Check for section B start
-            if not in_section_b and find_section_b_start(page_text):
-                in_section_b = True
-                section_b_found = True
+            # Check for section transitions (order matters: check ends before starts)
+            # Section B end
+            if current_section == "B" and is_section_b_end(page_text):
+                logger.debug(f"Section B ended on page {page_num}")
+                current_section = None
+
+            # Section D end
+            if current_section == "D" and is_section_d_end(page_text):
+                logger.debug(f"Section D ended on page {page_num}")
+                current_section = None
+
+            # Section E end
+            if current_section == "E" and is_section_e_end(page_text):
+                logger.debug(f"Section E ended on page {page_num}")
+                current_section = None
+
+            # Check for section starts
+            if current_section is None and find_section_b_start(page_text):
+                current_section = "B"
+                sections_found.add("B")
                 logger.debug(f"Found section B on page {page_num}")
 
-            if not in_section_b:
+            if current_section is None and find_section_d_start(page_text):
+                current_section = "D"
+                sections_found.add("D")
+                logger.debug(f"Found section D on page {page_num}")
+
+            if current_section is None and find_section_e_start(page_text):
+                current_section = "E"
+                sections_found.add("E")
+                logger.debug(f"Found section E on page {page_num}")
+
+            if current_section is None:
                 continue
 
-            # Check for section end (still process this page before breaking)
-            reached_section_end = is_section_end(page_text)
-            if reached_section_end:
-                logger.debug(f"Found section end on page {page_num}")
+            # Select the appropriate transaction list
+            if current_section == "B":
+                target_list = purchases
+            elif current_section == "D":
+                target_list = other_charges
+            else:  # "E"
+                target_list = voluntary_services
 
             # Parse text line by line
             for line in page_text.split("\n"):
@@ -251,42 +319,55 @@ def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
                 if not line:
                     continue
 
-                card_info = extract_card_holder(line)
-                if card_info:
-                    card_suffix, card_holder_name = card_info
-                    logger.debug(f"Found card holder: {card_suffix} - {card_holder_name}")
-                    continue
+                # Extract card holder info (only in section B)
+                if current_section == "B":
+                    card_info = extract_card_holder(line)
+                    if card_info:
+                        card_suffix, card_holder_name = card_info
+                        if card_suffix not in seen_card_suffixes:
+                            card_holders.append({
+                                "card_suffix": card_suffix,
+                                "name": card_holder_name
+                            })
+                            seen_card_suffixes.add(card_suffix)
+                            logger.debug(f"Found card holder: {card_suffix} - {card_holder_name}")
+                        continue
 
                 transaction = parse_transaction_line(line)
                 if transaction:
-                    transactions.append(transaction)
-                    logger.debug(f"Extracted transaction: {transaction['reference']}")
+                    target_list.append(transaction)
+                    logger.debug(f"Extracted {current_section} transaction: {transaction['reference']}")
 
-            if reached_section_end:
-                break
-
-        if not section_b_found:
+        if "B" not in sections_found:
             raise ValueError("Section 'B) Detalle de compras del periodo' not found in PDF")
 
-        # Calculate summary
-        total_crc = sum(t["amount_crc"] or 0 for t in transactions)
-        total_usd = sum(t["amount_usd"] or 0 for t in transactions)
+        # Calculate summaries
+        def calculate_summary(txns):
+            total_crc = sum(t["amount_crc"] or 0 for t in txns)
+            total_usd = sum(t["amount_usd"] or 0 for t in txns)
+            return {
+                "total_crc": round(total_crc, 2),
+                "total_usd": round(total_usd, 2),
+                "count": len(txns)
+            }
 
-        card_holder = {"card_suffix": card_suffix, "name": card_holder_name} if card_suffix else None
+        total_transactions = len(purchases) + len(other_charges) + len(voluntary_services)
 
         return {
             "metadata": {
                 "source_file": pdf_path.name,
                 "extraction_date": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
                 "statement_date": statement_date,
-                "total_transactions": len(transactions)
+                "total_transactions": total_transactions
             },
-            "card_holder": card_holder,
-            "transactions": transactions,
+            "card_holders": card_holders,
+            "purchases": purchases,
+            "other_charges": other_charges,
+            "voluntary_services": voluntary_services,
             "summary": {
-                "total_crc": round(total_crc, 2),
-                "total_usd": round(total_usd, 2),
-                "transaction_count": len(transactions)
+                "purchases": calculate_summary(purchases),
+                "other_charges": calculate_summary(other_charges),
+                "voluntary_services": calculate_summary(voluntary_services)
             }
         }
 
@@ -346,9 +427,17 @@ Examples:
         with open(args.output, "w", encoding="utf-8") as f:
             json.dump(result, f, indent=indent, ensure_ascii=False)
 
-        print(f"Extracted {result['summary']['transaction_count']} transactions to {args.output}")
-        print(f"Total CRC: {result['summary']['total_crc']:,.2f}")
-        print(f"Total USD: {result['summary']['total_usd']:,.2f}")
+        summary = result['summary']
+        print(f"Extracted {result['metadata']['total_transactions']} transactions to {args.output}")
+        print(f"  Purchases (B):          {summary['purchases']['count']:3d}  "
+              f"CRC {summary['purchases']['total_crc']:>12,.2f}  "
+              f"USD {summary['purchases']['total_usd']:>10,.2f}")
+        print(f"  Other charges (D):      {summary['other_charges']['count']:3d}  "
+              f"CRC {summary['other_charges']['total_crc']:>12,.2f}  "
+              f"USD {summary['other_charges']['total_usd']:>10,.2f}")
+        print(f"  Voluntary services (E): {summary['voluntary_services']['count']:3d}  "
+              f"CRC {summary['voluntary_services']['total_crc']:>12,.2f}  "
+              f"USD {summary['voluntary_services']['total_usd']:>10,.2f}")
 
     except ValueError as e:
         print(f"Error: {e}", file=sys.stderr)

From 6fc7da8899ad69a2e358a6ffbd40ee6f2e343ab5 Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 15:39:02 -0600
Subject: [PATCH 4/5] update gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index f04df3a..fcd11ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 *.pdf
 *.json
 __pycache__/
+testStatements/

From 14734d3125dd9da48bda8c55731ffd7abf68912c Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 15:39:16 -0600
Subject: [PATCH 5/5] fix bugs and simplify

---
 CLAUDE.md      |  20 ++-
 bac_extract.py | 393 ++++++++++++-------------------------------------
 2 files changed, 111 insertions(+), 302 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index ee95b39..5b058a9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,15 +4,19 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 
 ## Project Overview
 
-Single-script Python tool that extracts credit card transactions from BAC Costa Rica statement PDFs. Parses section "B) Detalle de compras del periodo" and outputs JSON.
+Single-script Python tool that extracts credit card transactions from BAC Costa Rica statement PDFs. Parses sections B (purchases), D (other charges), and E (voluntary services) and outputs JSON.
 
 ## Dependencies
 
 - pdfplumber (>=0.10.0)
 
-## Usage
+## Commands
 
 ```bash
+# Run tests
+python testStatements/run_tests.py
+
+# Run extractor
 python bac_extract.py <pdf_file> [options]
 
 # Examples
@@ -29,11 +33,15 @@ Options:
 
 The extraction pipeline:
 1. Validates PDF is a BAC statement (`is_bac_statement`)
-2. Locates section B via regex patterns (`find_section_b_start`, `is_section_end`)
-3. Extracts tables page-by-page using pdfplumber
-4. Parses Spanish dates (D-MMM-YY format) and amounts with comma separators
+2. Iterates pages line-by-line, detecting section boundaries via `SECTIONS` dict patterns
+3. Parses transactions matching `TRANSACTION_PATTERN` regex
+4. Outputs card holders, transactions by section, and summaries
+
+Key data structures:
+- `SECTIONS`: Maps section IDs (B/D/E) to start/end regex patterns and output keys
+- `SPANISH_MONTHS`: Spanish month abbreviations for date parsing
 
 Key parsing functions:
 - `parse_spanish_date`: Converts "15-ENE-25" to "2025-01-15"
 - `parse_amount`: Handles "1,234.56" and trailing negatives "100.00-"
-- `extract_card_holder`: Matches "************1234 NAME" pattern
+- `matches_patterns`: Generic regex pattern matcher for section detection
diff --git a/bac_extract.py b/bac_extract.py
index 6e19108..4757911 100755
--- a/bac_extract.py
+++ b/bac_extract.py
@@ -20,74 +20,63 @@ from typing import Optional
 
 import pdfplumber
 
-# Spanish month abbreviations to month numbers
 SPANISH_MONTHS = {
     "ENE": 1, "FEB": 2, "MAR": 3, "ABR": 4, "MAY": 5, "JUN": 6,
     "JUL": 7, "AGO": 8, "SEP": 9, "OCT": 10, "NOV": 11, "DIC": 12
 }
 
-# Card holder pattern: ************XXXX NAME
 CARD_HOLDER_PATTERN = re.compile(r"\*{12}(\d{4})\s+(.+)")
-
-# Date pattern: D-MMM-YY or DD-MMM-YY
 DATE_PATTERN = re.compile(r"(\d{1,2})-([A-Z]{3})-(\d{2})", re.IGNORECASE)
-
-# Transaction line pattern:
-# Reference  Date        Description         Location (optional)  Currency  Amount
-# 123456789012 9-ENE-26 EXAMPLE STORE                              CRC      1,234.56
 TRANSACTION_PATTERN = re.compile(
-    r"^(\d{12})\s+"                          # Reference (12 digits)
-    r"(\d{1,2}-[A-Z]{3}-\d{2})\s+"           # Date
-    r"(.+?)\s+"                               # Description
-    r"(CRC|USD)\s+"                           # Currency
-    r"([\d,]+\.\d{2})(-)?$",                 # Amount (with optional trailing minus)
+    r"^(\d{12,13})\s+"
+    r"(\d{1,2}-[A-Z]{3}-\d{2})\s+"
+    r"(.+?)\s+"
+    r"(CRC|USD)\s+"
+    r"([\d,]+\.\d{2})(-)?$",
     re.IGNORECASE
 )
 
+# Section definitions: start patterns, end patterns, output key
+SECTIONS = {
+    "B": {
+        "start": [r"B\)\s*Detalle\s+de\s+compras", r"Detalle\s+de\s+compras\s+del\s+periodo"],
+        "end": [r"Total\s+de\s+compras\s+del\s+periodo", r"C\)\s*Detalle", r"D\)\s*Detalle", r"E\)\s*Detalle"],
+        "key": "purchases",
+    },
+    "D": {
+        "start": [r"D\)\s*Detalle\s+de\s+otros\s+cargos"],
+        "end": [r"Total\s+por\s+concepto\s+otros\s+cargos", r"E\)\s*Detalle"],
+        "key": "other_charges",
+    },
+    "E": {
+        "start": [r"E\)\s*Detalle\s+de\s+productos\s+y\s+servicios"],
+        "end": [r"Total\s+por\s+concepto\s+de\s+productos", r"F\)\s*Cargos"],
+        "key": "voluntary_services",
+    },
+}
+
 logger = logging.getLogger(__name__)
 
 
 def parse_spanish_date(date_str: str) -> Optional[str]:
     """Parse Spanish date format (D-MMM-YY) to ISO format (YYYY-MM-DD)."""
-    if not date_str:
-        return None
-
-    match = DATE_PATTERN.match(date_str.strip())
+    match = DATE_PATTERN.match(date_str.strip()) if date_str else None
     if not match:
         return None
-
     day, month_abbr, year = match.groups()
     month = SPANISH_MONTHS.get(month_abbr.upper())
     if not month:
         return None
-
-    # Assume 2000s for 2-digit year
-    full_year = 2000 + int(year)
-
-    try:
-        return f"{full_year:04d}-{month:02d}-{int(day):02d}"
-    except ValueError:
-        return None
+    return f"{2000 + int(year):04d}-{month:02d}-{int(day):02d}"
 
 
 def parse_amount(amount_str: str) -> Optional[float]:
-    """
-    Parse amount string with comma thousands separator.
-    Handles trailing '-' for negative values.
-    """
-    if not amount_str:
+    """Parse amount with comma thousands separator. Handles trailing '-' for negatives."""
+    if not amount_str or not (amount_str := amount_str.strip()):
         return None
-
-    amount_str = amount_str.strip()
-    if not amount_str:
-        return None
-
     is_negative = amount_str.endswith("-")
-    if is_negative:
-        amount_str = amount_str[:-1]
-
     try:
-        amount = float(amount_str.replace(",", ""))
+        amount = float(amount_str.rstrip("-").replace(",", ""))
         return -amount if is_negative else amount
     except ValueError:
         return None
@@ -97,7 +86,6 @@ def is_bac_statement(pdf: pdfplumber.PDF) -> bool:
     """Check if the PDF is a BAC credit card statement."""
     if not pdf.pages:
         return False
-
     first_page_text = pdf.pages[0].extract_text() or ""
     return "BAC" in first_page_text and "TARJETA" in first_page_text.upper()
 
@@ -106,118 +94,43 @@ def extract_statement_date(pdf: pdfplumber.PDF) -> Optional[str]:
     """Extract the statement date from the PDF."""
     if not pdf.pages:
         return None
-
     first_page_text = pdf.pages[0].extract_text() or ""
     date_matches = DATE_PATTERN.findall(first_page_text)
     if not date_matches:
         return None
-
     day, month_abbr, year = date_matches[0]
     month = SPANISH_MONTHS.get(month_abbr.upper())
     if not month:
         return None
-
-    full_year = 2000 + int(year)
-    return f"{full_year:04d}-{month:02d}-{int(day):02d}"
+    return f"{2000 + int(year):04d}-{month:02d}-{int(day):02d}"
 
 
-def find_section_b_start(text: str) -> bool:
-    """Check if text contains start of section B (purchases)."""
-    patterns = [
-        r"B\)\s*Detalle\s+de\s+compras",
-        r"Detalle\s+de\s+compras\s+del\s+periodo",
-    ]
+def matches_patterns(text: str, patterns: list[str]) -> bool:
+    """Check if text matches any of the given regex patterns."""
     return any(re.search(p, text, re.IGNORECASE) for p in patterns)
 
 
-def find_section_d_start(text: str) -> bool:
-    """Check if text contains start of section D (other charges)."""
-    return bool(re.search(r"D\)\s*Detalle\s+de\s+otros\s+cargos", text, re.IGNORECASE))
-
-
-def find_section_e_start(text: str) -> bool:
-    """Check if text contains start of section E (voluntary products/services)."""
-    return bool(re.search(r"E\)\s*Detalle\s+de\s+productos\s+y\s+servicios", text, re.IGNORECASE))
-
-
-def is_section_b_end(text: str) -> bool:
-    """Check if text indicates the end of section B."""
-    end_patterns = [
-        r"Total\s+de\s+compras\s+del\s+periodo",
-        r"C\)\s*Detalle\s+de\s+intereses",
-        r"Detalle\s+de\s+intereses",
-        r"D\)\s*Detalle",
-    ]
-    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
-
-
-def is_section_d_end(text: str) -> bool:
-    """Check if text indicates the end of section D."""
-    end_patterns = [
-        r"Total\s+por\s+concepto\s+otros\s+cargos",
-        r"E\)\s*Detalle",
-    ]
-    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
-
-
-def is_section_e_end(text: str) -> bool:
-    """Check if text indicates the end of section E."""
-    end_patterns = [
-        r"Total\s+por\s+concepto\s+de\s+productos",
-        r"F\)\s*Cargos",
-    ]
-    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
-
-
-def extract_card_holder(row_text: str) -> Optional[tuple[str, str]]:
-    """
-    Extract card holder info from a row.
-    Returns (card_suffix, name) or None.
-    """
-    match = CARD_HOLDER_PATTERN.search(row_text)
-    if match:
-        return match.group(1), match.group(2).strip()
-    return None
-
-
 def parse_transaction_line(line: str) -> Optional[dict]:
-    """
-    Parse a text-based transaction line.
-
-    Format: Reference Date Description [Location] Currency Amount
-    Example: 123456789012 9-ENE-26 EXAMPLE STORE CRC 1,234.56
-    """
-    line = line.strip()
-    if not line:
-        return None
-
-    match = TRANSACTION_PATTERN.match(line)
+    """Parse a transaction line into a dict, or return None if not a transaction."""
+    match = TRANSACTION_PATTERN.match(line.strip())
     if not match:
         return None
 
-    reference = match.group(1)
-    date_str = match.group(2)
-    description = match.group(3).strip()
-    currency = match.group(4).upper()
-    amount_str = match.group(5)
-    is_negative = match.group(6) == "-"
+    reference, date_str, description, currency, amount_str, neg = match.groups()
+    currency = currency.upper()
 
     date = parse_spanish_date(date_str)
-    if not date:
-        logger.warning(f"Could not parse date '{date_str}' for reference {reference}")
-        return None
-
     amount = parse_amount(amount_str)
-    if amount is None:
-        logger.warning(f"Could not parse amount '{amount_str}' for reference {reference}")
+    if not date or amount is None:
+        logger.warning(f"Could not parse transaction: {line}")
         return None
-    if is_negative:
+    if neg:
         amount = -amount
 
     return {
         "reference": reference,
         "date": date,
-        "description": description,
+        "description": description.strip(),
         "location": None,
         "currency": currency,
         "amount_crc": amount if currency == "CRC" else None,
@@ -226,228 +139,116 @@ def parse_transaction_line(line: str) -> Optional[dict]:
 
 
 def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
-    """
-    Extract transactions from a BAC credit card statement PDF.
-
-    Args:
-        pdf_path: Path to the PDF file
-        verbose: Enable verbose logging
-
-    Returns:
-        Dictionary with metadata, card_holders, purchases, other_charges,
-        voluntary_services, and summary
-    """
-    if verbose:
-        logging.basicConfig(level=logging.DEBUG)
-    else:
-        logging.basicConfig(level=logging.INFO)
+    """Extract transactions from a BAC credit card statement PDF."""
+    logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO)
 
     with pdfplumber.open(pdf_path) as pdf:
-        # Validate this is a BAC statement
         if not is_bac_statement(pdf):
             raise ValueError("PDF does not appear to be a BAC credit card statement")
 
         statement_date = extract_statement_date(pdf)
-
-        # Transactions by section
-        purchases = []           # Section B
-        other_charges = []       # Section D
-        voluntary_services = []  # Section E
-
-        # Track card holders (may have multiple)
+        transactions = {s["key"]: [] for s in SECTIONS.values()}
         card_holders = []
         seen_card_suffixes = set()
-
-        # Section tracking: None, "B", "D", "E"
         current_section = None
-        sections_found = set()
+        sections_completed = set()
 
-        # Start from page 2 (index 1) as page 1 is summary only
         start_page = 1 if len(pdf.pages) > 1 else 0
-
         for page_num, page in enumerate(pdf.pages[start_page:], start=start_page + 1):
             page_text = page.extract_text() or ""
-
             logger.debug(f"Processing page {page_num}")
 
-            # Check for section transitions (order matters: check ends before starts)
-            # Section B end
-            if current_section == "B" and is_section_b_end(page_text):
-                logger.debug(f"Section B ended on page {page_num}")
-                current_section = None
-
-            # Section D end
-            if current_section == "D" and is_section_d_end(page_text):
-                logger.debug(f"Section D ended on page {page_num}")
-                current_section = None
-
-            # Section E end
-            if current_section == "E" and is_section_e_end(page_text):
-                logger.debug(f"Section E ended on page {page_num}")
-                current_section = None
-
-            # Check for section starts
-            if current_section is None and find_section_b_start(page_text):
-                current_section = "B"
-                sections_found.add("B")
-                logger.debug(f"Found section B on page {page_num}")
-
-            if current_section is None and find_section_d_start(page_text):
-                current_section = "D"
-                sections_found.add("D")
-                logger.debug(f"Found section D on page {page_num}")
-
-            if current_section is None and find_section_e_start(page_text):
-                current_section = "E"
-                sections_found.add("E")
-                logger.debug(f"Found section E on page {page_num}")
-
-            if current_section is None:
-                continue
-
-            # Select the appropriate transaction list
-            if current_section == "B":
-                target_list = purchases
-            elif current_section == "D":
-                target_list = other_charges
-            else:  # "E"
-                target_list = voluntary_services
-
-            # Parse text line by line
             for line in page_text.split("\n"):
                 line = line.strip()
                 if not line:
                     continue
 
-                # Extract card holder info (only in section B)
-                if current_section == "B":
-                    card_info = extract_card_holder(line)
-                    if card_info:
-                        card_suffix, card_holder_name = card_info
-                        if card_suffix not in seen_card_suffixes:
-                            card_holders.append({
-                                "card_suffix": card_suffix,
-                                "name": card_holder_name
-                            })
-                            seen_card_suffixes.add(card_suffix)
-                            logger.debug(f"Found card holder: {card_suffix} - {card_holder_name}")
-                        continue
+                # Check for section end
+                if current_section and matches_patterns(line, SECTIONS[current_section]["end"]):
+                    logger.debug(f"Section {current_section} ended on page {page_num}")
+                    sections_completed.add(current_section)
+                    current_section = None
 
-                transaction = parse_transaction_line(line)
-                if transaction:
-                    target_list.append(transaction)
-                    logger.debug(f"Extracted {current_section} transaction: {transaction['reference']}")
+                # Check for section start
+                if current_section is None:
+                    for sec_id, sec in SECTIONS.items():
+                        if sec_id not in sections_completed and matches_patterns(line, sec["start"]):
+                            current_section = sec_id
+                            logger.debug(f"Found section {sec_id} on page {page_num}")
+                            break
+                    continue
 
-        if "B" not in sections_found:
+                # Extract card holder
+                match = CARD_HOLDER_PATTERN.search(line)
+                if match:
+                    suffix, name = match.group(1), match.group(2).strip()
+                    if suffix not in seen_card_suffixes:
+                        card_holders.append({"card_suffix": suffix, "name": name})
+                        seen_card_suffixes.add(suffix)
+                        logger.debug(f"Found card holder: {suffix} - {name}")
+                    continue
+
+                # Parse transaction
+                txn = parse_transaction_line(line)
+                if txn:
+                    transactions[SECTIONS[current_section]["key"]].append(txn)
+                    logger.debug(f"Extracted {current_section} transaction: {txn['reference']}")
+
+        if "B" not in sections_completed and not transactions["purchases"]:
             raise ValueError("Section 'B) Detalle de compras del periodo' not found in PDF")
 
-        # Calculate summaries
-        def calculate_summary(txns):
-            total_crc = sum(t["amount_crc"] or 0 for t in txns)
-            total_usd = sum(t["amount_usd"] or 0 for t in txns)
+        def summarize(txns):
             return {
-                "total_crc": round(total_crc, 2),
-                "total_usd": round(total_usd, 2),
-                "count": len(txns)
+                "total_crc": round(sum(t["amount_crc"] or 0 for t in txns), 2),
+                "total_usd": round(sum(t["amount_usd"] or 0 for t in txns), 2),
+                "count": len(txns),
             }
 
-        total_transactions = len(purchases) + len(other_charges) + len(voluntary_services)
-
         return {
             "metadata": {
                 "source_file": pdf_path.name,
                 "extraction_date": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
                 "statement_date": statement_date,
-                "total_transactions": total_transactions
+                "total_transactions": sum(len(t) for t in transactions.values()),
             },
             "card_holders": card_holders,
-            "purchases": purchases,
-            "other_charges": other_charges,
-            "voluntary_services": voluntary_services,
-            "summary": {
-                "purchases": calculate_summary(purchases),
-                "other_charges": calculate_summary(other_charges),
-                "voluntary_services": calculate_summary(voluntary_services)
-            }
+            **transactions,
+            "summary": {key: summarize(txns) for key, txns in transactions.items()},
         }
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description="Extract transactions from BAC Costa Rica credit card statement PDFs",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-        epilog="""
-Examples:
-  python bac_extract.py EstadodeCuenta.pdf --pretty
-  python bac_extract.py statement.pdf -o output.json -v
-        """
-    )
-
-    parser.add_argument(
-        "pdf_file",
-        type=Path,
-        help="Path to the BAC statement PDF"
-    )
-
-    parser.add_argument(
-        "-o", "--output",
-        type=Path,
-        default=Path("transactions.json"),
-        help="Output JSON file path (default: transactions.json)"
-    )
-
-    parser.add_argument(
-        "--pretty",
-        action="store_true",
-        help="Pretty-print JSON output"
-    )
-
-    parser.add_argument(
-        "-v", "--verbose",
-        action="store_true",
-        help="Enable verbose logging"
-    )
-
+    parser = argparse.ArgumentParser(description="Extract transactions from BAC CR statement PDFs")
+    parser.add_argument("pdf_file", type=Path, help="Path to the BAC statement PDF")
+    parser.add_argument("-o", "--output", type=Path, default=Path("transactions.json"))
+    parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging")
     args = parser.parse_args()
 
-    # Validate PDF file exists
     if not args.pdf_file.exists():
-        print(f"Error: File not found: {args.pdf_file}", file=sys.stderr)
-        sys.exit(1)
-
-    if not args.pdf_file.suffix.lower() == ".pdf":
-        print(f"Error: File must be a PDF: {args.pdf_file}", file=sys.stderr)
-        sys.exit(1)
+        sys.exit(f"Error: File not found: {args.pdf_file}")
+    if args.pdf_file.suffix.lower() != ".pdf":
+        sys.exit(f"Error: File must be a PDF: {args.pdf_file}")
 
     try:
         result = extract_transactions(args.pdf_file, args.verbose)
-
-        # Write output
-        indent = 2 if args.pretty else None
         with open(args.output, "w", encoding="utf-8") as f:
-            json.dump(result, f, indent=indent, ensure_ascii=False)
+            json.dump(result, f, indent=2 if args.pretty else None, ensure_ascii=False)
 
-        summary = result['summary']
+        summary = result["summary"]
         print(f"Extracted {result['metadata']['total_transactions']} transactions to {args.output}")
-        print(f"  Purchases (B):          {summary['purchases']['count']:3d}  "
-              f"CRC {summary['purchases']['total_crc']:>12,.2f}  "
-              f"USD {summary['purchases']['total_usd']:>10,.2f}")
-        print(f"  Other charges (D):      {summary['other_charges']['count']:3d}  "
-              f"CRC {summary['other_charges']['total_crc']:>12,.2f}  "
-              f"USD {summary['other_charges']['total_usd']:>10,.2f}")
-        print(f"  Voluntary services (E): {summary['voluntary_services']['count']:3d}  "
-              f"CRC {summary['voluntary_services']['total_crc']:>12,.2f}  "
-              f"USD {summary['voluntary_services']['total_usd']:>10,.2f}")
+        for key, label in [("purchases", "Purchases (B)"), ("other_charges", "Other charges (D)"),
+                           ("voluntary_services", "Voluntary services (E)")]:
+            s = summary[key]
+            print(f"  {label:25} {s['count']:3d}  CRC {s['total_crc']:>12,.2f}  USD {s['total_usd']:>10,.2f}")
 
     except ValueError as e:
-        print(f"Error: {e}", file=sys.stderr)
-        sys.exit(1)
+        sys.exit(f"Error: {e}")
     except Exception as e:
-        print(f"Error processing PDF: {e}", file=sys.stderr)
         if args.verbose:
             import traceback
             traceback.print_exc()
-        sys.exit(1)
+        sys.exit(f"Error processing PDF: {e}")
 
 
 if __name__ == "__main__":