From a05f701f1662b83228279fc8f35f2040bc31b728 Mon Sep 17 00:00:00 2001
From: Fabian Montero <fabian@posixlycorrect.com>
Date: Mon, 9 Mar 2026 13:59:03 -0600
Subject: [PATCH] remove card suffix functionality

---
 CLAUDE.md      |   9 ++--
 bac_extract.py | 124 ++++++++++++++-----------------------------------
 2 files changed, 39 insertions(+), 94 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 08dc084..ee95b39 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -13,11 +13,11 @@ Single-script Python tool that extracts credit card transactions from BAC Costa
 ## Usage
 
 ```bash
-python bac_extract.py <pdf_file> <card_suffix> [options]
+python bac_extract.py <pdf_file> [options]
 
 # Examples
-python bac_extract.py EstadodeCuenta.pdf 1234 --pretty
-python bac_extract.py statement.pdf 1234 -o output.json -v
+python bac_extract.py EstadodeCuenta.pdf --pretty
+python bac_extract.py statement.pdf -o output.json -v
 ```
 
 Options:
@@ -31,8 +31,7 @@ The extraction pipeline:
 1. Validates PDF is a BAC statement (`is_bac_statement`)
 2. Locates section B via regex patterns (`find_section_b_start`, `is_section_end`)
 3. Extracts tables page-by-page using pdfplumber
-4. Filters transactions by card suffix (last 4 digits)
-5. Parses Spanish dates (D-MMM-YY format) and amounts with comma separators
+4. Parses Spanish dates (D-MMM-YY format) and amounts with comma separators
 
 Key parsing functions:
 - `parse_spanish_date`: Converts "15-ENE-25" to "2025-01-15"
diff --git a/bac_extract.py b/bac_extract.py
index 850bef1..adff408 100755
--- a/bac_extract.py
+++ b/bac_extract.py
@@ -72,21 +72,19 @@ def parse_amount(amount_str: str) -> Optional[float]:
     Parse amount string with comma thousands separator.
     Handles trailing '-' for negative values.
     """
-    if not amount_str or not amount_str.strip():
+    if not amount_str:
         return None
 
     amount_str = amount_str.strip()
+    if not amount_str:
+        return None
 
-    # Check for trailing negative sign
     is_negative = amount_str.endswith("-")
     if is_negative:
-        amount_str = amount_str[:-1].strip()
+        amount_str = amount_str[:-1]
 
-    # Remove thousands separators (commas) and handle decimal point
-    # Format: 1,234.56 or 1,234,567.89
     try:
-        amount_str = amount_str.replace(",", "")
-        amount = float(amount_str)
+        amount = float(amount_str.replace(",", ""))
         return -amount if is_negative else amount
     except ValueError:
         return None
@@ -107,19 +105,17 @@ def extract_statement_date(pdf: pdfplumber.PDF) -> Optional[str]:
         return None
 
     first_page_text = pdf.pages[0].extract_text() or ""
-
-    # Look for date patterns in the first page
-    # Common format: "Fecha de corte: DD-MMM-YY" or similar
     date_matches = DATE_PATTERN.findall(first_page_text)
-    if date_matches:
-        # Use the first date found as statement date
-        day, month_abbr, year = date_matches[0]
-        month = SPANISH_MONTHS.get(month_abbr.upper())
-        if month:
-            full_year = 2000 + int(year)
-            return f"{full_year:04d}-{month:02d}-{int(day):02d}"
+    if not date_matches:
+        return None
 
-    return None
+    day, month_abbr, year = date_matches[0]
+    month = SPANISH_MONTHS.get(month_abbr.upper())
+    if not month:
+        return None
+
+    full_year = 2000 + int(year)
+    return f"{full_year:04d}-{month:02d}-{int(day):02d}"
 
 
 def find_section_b_start(page_text: str) -> bool:
@@ -128,10 +124,7 @@ def find_section_b_start(page_text: str) -> bool:
         r"B\)\s*Detalle\s+de\s+compras",
         r"Detalle\s+de\s+compras\s+del\s+periodo",
     ]
-    for pattern in patterns:
-        if re.search(pattern, page_text, re.IGNORECASE):
-            return True
-    return False
+    return any(re.search(p, page_text, re.IGNORECASE) for p in patterns)
 
 
 def is_section_end(text: str) -> bool:
@@ -142,10 +135,7 @@ def is_section_end(text: str) -> bool:
         r"Detalle\s+de\s+intereses",
         r"D\)\s*Detalle",
     ]
-    for pattern in end_patterns:
-        if re.search(pattern, text, re.IGNORECASE):
-            return True
-    return False
+    return any(re.search(p, text, re.IGNORECASE) for p in end_patterns)
 
 
 def extract_card_holder(row_text: str) -> Optional[tuple[str, str]]:
@@ -176,18 +166,16 @@ def parse_transaction_line(line: str) -> Optional[dict]:
 
     reference = match.group(1)
     date_str = match.group(2)
-    desc_and_loc = match.group(3).strip()
+    description = match.group(3).strip()
     currency = match.group(4).upper()
     amount_str = match.group(5)
     is_negative = match.group(6) == "-"
 
-    # Parse date
     date = parse_spanish_date(date_str)
     if not date:
         logger.warning(f"Could not parse date '{date_str}' for reference {reference}")
         return None
 
-    # Parse amount
     amount = parse_amount(amount_str)
     if amount is None:
         logger.warning(f"Could not parse amount '{amount_str}' for reference {reference}")
@@ -195,34 +183,23 @@ def parse_transaction_line(line: str) -> Optional[dict]:
     if is_negative:
         amount = -amount
 
-    # Split description and location
-    # Location is typically at the end, often a short suffix like "ANILL", "San Jose"
-    # For now, keep everything as description
-    description = desc_and_loc
-    location = None
-
-    # Set amount in appropriate currency field
-    amount_crc = amount if currency == "CRC" else None
-    amount_usd = amount if currency == "USD" else None
-
     return {
         "reference": reference,
         "date": date,
         "description": description,
-        "location": location,
+        "location": None,
         "currency": currency,
-        "amount_crc": amount_crc,
-        "amount_usd": amount_usd,
+        "amount_crc": amount if currency == "CRC" else None,
+        "amount_usd": amount if currency == "USD" else None,
     }
 
 
-def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False) -> dict:
+def extract_transactions(pdf_path: Path, verbose: bool = False) -> dict:
     """
     Extract transactions from a BAC credit card statement PDF.
 
     Args:
         pdf_path: Path to the PDF file
-        card_suffix: Last 4 digits of card to filter
         verbose: Enable verbose logging
 
     Returns:
@@ -241,11 +218,10 @@ def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False
         statement_date = extract_statement_date(pdf)
 
         transactions = []
-        current_card_suffix = None
-        current_card_name = None
+        card_suffix = None
+        card_holder_name = None
         in_section_b = False
         section_b_found = False
-        card_suffix_found = False
 
         # Start from page 2 (index 1) as page 1 is summary only
         start_page = 1 if len(pdf.pages) > 1 else 0
@@ -261,67 +237,48 @@ def extract_transactions(pdf_path: Path, card_suffix: str, verbose: bool = False
                 section_b_found = True
                 logger.debug(f"Found section B on page {page_num}")
 
-            # Check for section end
-            if in_section_b and is_section_end(page_text):
-                logger.debug(f"Found section end on page {page_num}")
-                # Still process this page, but mark we're ending
-
             if not in_section_b:
                 continue
 
+            # Check for section end (still process this page before breaking)
+            reached_section_end = is_section_end(page_text)
+            if reached_section_end:
+                logger.debug(f"Found section end on page {page_num}")
+
             # Parse text line by line
             for line in page_text.split("\n"):
                 line = line.strip()
                 if not line:
                     continue
 
-                # Check for card holder line
                 card_info = extract_card_holder(line)
                 if card_info:
-                    current_card_suffix, current_card_name = card_info
-                    logger.debug(f"Found card holder: {current_card_suffix} - {current_card_name}")
-                    if current_card_suffix == card_suffix:
-                        card_suffix_found = True
+                    card_suffix, card_holder_name = card_info
+                    logger.debug(f"Found card holder: {card_suffix} - {card_holder_name}")
                     continue
 
-                # Skip if we're not tracking the right card
-                if current_card_suffix != card_suffix:
-                    continue
-
-                # Try to parse as transaction
                 transaction = parse_transaction_line(line)
                 if transaction:
                     transactions.append(transaction)
                     logger.debug(f"Extracted transaction: {transaction['reference']}")
 
-            # Check if we've passed section B
-            if in_section_b and is_section_end(page_text):
+            if reached_section_end:
                 break
 
         if not section_b_found:
             raise ValueError("Section 'B) Detalle de compras del periodo' not found in PDF")
 
-        if not card_suffix_found:
-            raise ValueError(f"Card suffix '{card_suffix}' not found in statement")
-
         # Calculate summary
         total_crc = sum(t["amount_crc"] or 0 for t in transactions)
         total_usd = sum(t["amount_usd"] or 0 for t in transactions)
 
-        # Get card holder info
-        card_holder = None
-        if card_suffix_found:
-            card_holder = {
-                "card_suffix": card_suffix,
-                "name": current_card_name if current_card_suffix == card_suffix else None
-            }
+        card_holder = {"card_suffix": card_suffix, "name": card_holder_name} if card_suffix else None
 
         return {
             "metadata": {
                 "source_file": pdf_path.name,
                 "extraction_date": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
                 "statement_date": statement_date,
-                "card_filter": card_suffix,
                 "total_transactions": len(transactions)
             },
             "card_holder": card_holder,
@@ -340,8 +297,8 @@ def main():
         formatter_class=argparse.RawDescriptionHelpFormatter,
         epilog="""
 Examples:
-  python bac_extract.py EstadodeCuenta.pdf 1234 --pretty
-  python bac_extract.py statement.pdf 1234 -o output.json -v
+  python bac_extract.py EstadodeCuenta.pdf --pretty
+  python bac_extract.py statement.pdf -o output.json -v
         """
     )
 
@@ -351,12 +308,6 @@ Examples:
         help="Path to the BAC statement PDF"
     )
 
-    parser.add_argument(
-        "card_suffix",
-        type=str,
-        help="Last 4 digits of card to filter (e.g., 1234)"
-    )
-
     parser.add_argument(
         "-o", "--output",
         type=Path,
@@ -378,11 +329,6 @@ Examples:
 
     args = parser.parse_args()
 
-    # Validate card suffix
-    if not args.card_suffix.isdigit() or len(args.card_suffix) != 4:
-        print(f"Error: Card suffix must be exactly 4 digits, got '{args.card_suffix}'", file=sys.stderr)
-        sys.exit(1)
-
     # Validate PDF file exists
     if not args.pdf_file.exists():
         print(f"Error: File not found: {args.pdf_file}", file=sys.stderr)
@@ -393,7 +339,7 @@ Examples:
         sys.exit(1)
 
     try:
-        result = extract_transactions(args.pdf_file, args.card_suffix, args.verbose)
+        result = extract_transactions(args.pdf_file, args.verbose)
 
         # Write output
         indent = 2 if args.pretty else None