[{"task_id":"easy_perfect_match","name":"Perfect Three-Way Match","difficulty":"easy","description":"All three documents agree exactly. Confirm and approve."},{"task_id":"easy_no_po_found","name":"No Purchase Order Found","difficulty":"easy","description":"Invoice references a PO that does not exist. Reject immediately."},{"task_id":"medium_quantity_shortfall","name":"Quantity Shortfall Reconciliation","difficulty":"medium","description":"GRN shows fewer items received than invoiced. Recalculate and partial-approve."},{"task_id":"medium_price_discrepancy","name":"Unit Price Discrepancy","difficulty":"medium","description":"Invoice unit price deviates from agreed PO price beyond the policy threshold. Reject."},{"task_id":"hard_policy_violation","name":"Policy Violation — Unauthorized Freight","difficulty":"hard","description":"Freight charge exceeds the episode-specific policy cap. Detect violation and reject."},{"task_id":"hard_duplicate_invoice","name":"Duplicate Invoice Detection","difficulty":"hard","description":"Invoice ID already appears in the paid ledger. Block the duplicate."},{"task_id":"medium_split_delivery","name":"Split Delivery Reconciliation","difficulty":"medium","description":"Goods arrived in two shipments across two GRNs. Sum quantities and approve full."},{"task_id":"medium_vendor_mismatch","name":"Vendor Name Mismatch","difficulty":"medium","description":"Invoice vendor name does not exactly match the PO vendor. Reject per policy."},{"task_id":"hard_partial_po_match","name":"Partial PO Coverage","difficulty":"hard","description":"Invoice includes line items not covered by the PO. Partial-approve only authorised items."},{"task_id":"hard_tax_discrepancy","name":"Unauthorized Tax Charge","difficulty":"hard","description":"Vendor adds a tax charge with no PO authorisation. Detect and reject."},{"task_id":"hard_currency_conversion","name":"Foreign Currency Invoice Reconciliation","difficulty":"hard","description":"Invoice is in EUR; PO is in USD. Convert using the policy exchange rate and decide."},{"task_id":"hard_manager_preapproval","name":"Manager Pre-Approved Freight Override","difficulty":"hard","description":"Freight exceeds cap but manager pre-approval may exist. Escalate before deciding."},{"task_id":"hard_credit_memo","name":"Credit Memo Processing","difficulty":"hard","description":"Vendor issues a credit memo (negative invoice). Verify PO and process correctly."},{"task_id":"long_invoice_dispute","name":"Invoice Dispute Resolution","difficulty":"long-horizon","description":"Price-inflated invoice triggers vendor dispute. Multi-step process: query, escalate, compliance review, then reject and request corrected invoice."},{"task_id":"long_policy_migration","name":"Policy Migration — Mid-Episode Rule Change","difficulty":"long-horizon","description":"Freight within new policy cap but over old cap. Agent must detect mid-episode policy update and approve correctly."},{"task_id":"long_batch_reconciliation","name":"Batch Invoice Reconciliation","difficulty":"long-horizon","description":"Process invoice within a batch context. Apply three-way match independently regardless of other batch invoices."},{"task_id":"long_manager_chain","name":"Manager Unavailable — VP Finance Escalation Chain","difficulty":"long-horizon","description":"Freight over cap; manager is OOO. Must escalate up chain to VP Finance who has pre-approved. Then approve."},{"task_id":"long_fraud_investigation","name":"Fraud Investigation — Duplicate Payment Attempt","difficulty":"long-horizon","description":"Vendor falsely disputes duplicate invoice status. Multi-step investigation: query vendor, escalate for ledger audit, then reject."},{"task_id":"long_audit_trail","name":"SOX Audit Trail — Compliance Documentation","difficulty":"long-horizon","description":"Standard approval with SOX audit requirements. Explanation must cite PO, GRN, amounts, and policy basis for compliance."},{"task_id":"long_multi_vendor_split","name":"Multi-Shipment Split — Partial Delivery Tranche","difficulty":"long-horizon","description":"PO split into 3 delivery tranches. Approve only the first tranche invoice amount; remaining covered by future invoices."},{"task_id":"oversight_fraud_detection","name":"Oversight: Fraud Detection","difficulty":"oversight","description":"Review batch of AP Clerk decisions, identify fraudulent approvals, flag with specific numeric evidence. Use POST /oversight/reset to start."},{"task_id":"oversight_pattern_recognition","name":"Oversight: Pattern Recognition","difficulty":"oversight","description":"Identify recurring violation pattern across multiple clerk decisions. Use POST /oversight/reset to start."},{"task_id":"oversight_false_positive_trap","name":"Oversight: False Positive Trap","difficulty":"oversight","description":"All episodes are clean — agent must CLEAR all without over-flagging. Use POST /oversight/reset to start."},{"task_id":"oversight_explanation_quality","name":"Oversight: Explanation Quality","difficulty":"oversight","description":"Single fraudulent episode — agent must provide explanation with specific numeric citations. Use POST /oversight/reset to start."}]