""" Book Arbitrage Opportunity Scanner (test version) ------------------------------------------------- Scans a list of ISBN-13 codes or ASINs with the Keepa API, estimates profit, and prints a pandas table of promising Amazon book arbitrage opportunities. This is intentionally beginner-friendly and modular so it can be extended later with: - eBay or local sourcing inputs - Telegram/email alerts - CSV exports - richer fee calculations - restriction checks from Seller Central tools """ from __future__ import annotations from datetime import datetime, timedelta, timezone from getpass import getpass import random import sys import time from typing import Dict, Iterable, List, Optional, Sequence, Tuple import keepa import pandas as pd # --------------------------------------------------------------------------- # Sample inputs # --------------------------------------------------------------------------- # Replace these with your own ISBN-13 values or ASINs. # These are common textbook-style titles for testing. Keepa accepts ASINs, # ISBN-10s, and ISBN-13s. We split ISBN-13 vs ASIN automatically below. SAMPLE_CODES: List[str] = [ "9781285740621", # Calculus: Early Transcendentals (Stewart) "9780134763640", # Chemistry: The Central Science "9780134093413", # Campbell Biology "9780321973610", # University Physics with Modern Physics "9781259911156", # Fundamentals of Physics "9780134462017", # Organic Chemistry "9780135988042", # Starting Out with Python "9781260247787", # Principles of Economics "9781319102593", # Calculus, Volume 1 "9780393614176", # Give Me Liberty! ] # --------------------------------------------------------------------------- # Scanner settings # --------------------------------------------------------------------------- AMAZON_REFERRAL_RATE = 0.15 FBM_PER_ORDER_FEE = 1.00 SHIPPING_ESTIMATE = 4.00 ROI_THRESHOLD = 0.25 MAX_SALES_RANK = 1_000_000 LOOKBACK_DAYS = 90 DEFAULT_BUY_PRICE = 7.50 SIMULATE_BUY_PRICE_RANGE = False BUY_PRICE_RANGE = (5.00, 10.00) BATCH_SIZE = 10 SLEEP_BETWEEN_BATCHES = 1.0 # --------------------------------------------------------------------------- # Small helpers # --------------------------------------------------------------------------- def prompt_keepa_key() -> str: """Prompt for the Keepa API key without echoing it to the screen.""" print("Enter your Keepa API key.") print("If you do not have one yet, get it from https://keepa.com/#!api") key = getpass("Keepa API key: ").strip() if not key: raise ValueError("A Keepa API key is required to run this script.") return key def prompt_buy_price() -> float: """ Ask the user for a simple fixed buy price. Press Enter to use the default. """ raw = input(f"Estimated source buy price per book [default {DEFAULT_BUY_PRICE:.2f}]: ").strip() if not raw: return DEFAULT_BUY_PRICE return float(raw) def clean_code(code: str) -> str: """Normalize a product code by stripping spaces and hyphens.""" return code.replace("-", "").replace(" ", "").strip().upper() def is_isbn13(code: str) -> bool: """Very simple ISBN-13 detector.""" return len(code) == 13 and code.isdigit() def split_codes(codes: Sequence[str]) -> Tuple[List[str], List[str]]: """ Split codes into ISBN-13 values and ASIN/other product identifiers. Keepa needs `product_code_is_asin=False` for ISBN-13 lookups. """ normalized = [clean_code(code) for code in codes if clean_code(code)] isbn13_codes = [code for code in normalized if is_isbn13(code)] asin_codes = [code for code in normalized if not is_isbn13(code)] return isbn13_codes, asin_codes def chunked(items: Sequence[str], size: int) -> Iterable[List[str]]: """Yield list chunks so large scans are easier on API tokens.""" for index in range(0, len(items), size): yield list(items[index : index + size]) def latest_positive(values: Sequence[float]) -> Optional[float]: """ Return the latest positive value from a Keepa history array. Keepa uses -1 or 0-like placeholders for missing states in some streams. """ for value in reversed(list(values)): if value is not None and value > 0: return float(value) return None def recent_positive_values( values: Sequence[float], times: Sequence[datetime], lookback_days: int = LOOKBACK_DAYS, ) -> List[float]: """Return positive values from the last N days for simple price stats.""" cutoff = datetime.now(timezone.utc) - timedelta(days=lookback_days) recent: List[float] = [] for value, timestamp in zip(values, times): if timestamp is None: continue if timestamp.tzinfo is None: timestamp = timestamp.replace(tzinfo=timezone.utc) if timestamp >= cutoff and value is not None and value > 0: recent.append(float(value)) return recent def get_buy_price(base_buy_price: float) -> float: """Use either a fixed buy price or a simulated range for testing.""" if SIMULATE_BUY_PRICE_RANGE: return round(random.uniform(*BUY_PRICE_RANGE), 2) return round(base_buy_price, 2) def estimate_profit(used_sell_price: float, buy_price: float) -> Tuple[float, float]: """ Estimate FBM profit using the user's requested simplified model: payout_after_referral = used_sell_price * 0.85 profit = payout_after_referral - buy_price - shipping_estimate - $1 FBM fee roi = profit / buy_price """ payout_after_referral = used_sell_price * (1 - AMAZON_REFERRAL_RATE) profit = payout_after_referral - buy_price - SHIPPING_ESTIMATE - FBM_PER_ORDER_FEE roi = (profit / buy_price) if buy_price else 0.0 return round(profit, 2), round(roi, 4) def keepa_product_url(asin: str, domain_id: int = 1) -> str: """Build a direct Keepa product page link.""" return f"https://keepa.com/#!product/{domain_id}-{asin}" def infer_major_restriction(product: Dict) -> bool: """ Keepa does not provide a full Amazon ungating/restriction decision the way Seller Central does, so this function only flags obvious edge cases. Later you can replace this with: - SellerAmp / ScoutIQ / InventoryLab checks - Amazon SP-API listing restrictions endpoints - your own allowed category / hazmat rules """ obvious_flags = [ product.get("isAdultProduct"), product.get("hazardousMaterials") not in (None, "", []), ] return any(obvious_flags) def select_used_price(product: Dict) -> Optional[float]: """ Choose a used sell price with sensible fallbacks. Priority: 1. Current USED history value 2. Used condition shipping-inclusive histories 3. Buy box shipping history """ history = product.get("data", {}) candidate_streams = [ "USED", "USED_GOOD_SHIPPING", "USED_VERY_GOOD_SHIPPING", "USED_ACCEPTABLE_SHIPPING", "USED_NEW_SHIPPING", "BUY_BOX_SHIPPING", ] for stream_name in candidate_streams: values = history.get(stream_name, []) price = latest_positive(values) if price is not None: return round(price, 2) return None def summarize_used_price_history(product: Dict) -> Dict[str, Optional[float]]: """ Build simple history stats from the used price stream over the last N days. """ history = product.get("data", {}) used_values = history.get("USED", []) used_times = history.get("USED_time", []) recent_used = recent_positive_values(used_values, used_times, LOOKBACK_DAYS) if not recent_used: return { "used_avg_90d": None, "used_min_90d": None, "used_max_90d": None, } return { "used_avg_90d": round(sum(recent_used) / len(recent_used), 2), "used_min_90d": round(min(recent_used), 2), "used_max_90d": round(max(recent_used), 2), } def latest_sales_rank(product: Dict) -> Optional[int]: """ Use the latest positive sales rank from the SALES history. Lower is generally better. We only keep rank values below our threshold. """ history = product.get("data", {}) rank = latest_positive(history.get("SALES", [])) return int(rank) if rank is not None else None def analyze_product(product: Dict, base_buy_price: float) -> Optional[Dict]: """ Turn a Keepa product payload into a single opportunity record. Returns None if the product does not have enough data. """ asin = product.get("asin") title = product.get("title", "Unknown title") used_price = select_used_price(product) sales_rank = latest_sales_rank(product) restricted = infer_major_restriction(product) if not asin or used_price is None or sales_rank is None: return None buy_price = get_buy_price(base_buy_price) est_profit, roi = estimate_profit(used_price, buy_price) price_stats = summarize_used_price_history(product) return { "ISBN/ASIN": product.get("ean") or asin, "ASIN": asin, "Title": title, "Amazon Used Price": round(used_price, 2), "Sales Rank": sales_rank, "Est Buy Price": round(buy_price, 2), "Est Profit": est_profit, "ROI %": round(roi * 100, 2), "Keepa link": keepa_product_url(asin), "restricted": restricted, "used_avg_90d": price_stats["used_avg_90d"], "used_min_90d": price_stats["used_min_90d"], "used_max_90d": price_stats["used_max_90d"], } def query_keepa_products( api: keepa.Keepa, codes: Sequence[str], *, product_code_is_asin: bool, ) -> List[Dict]: """ Query Keepa in small batches with simple retry logic. We let Keepa wait for tokens automatically, and we add a retry sleep for transient API or rate-limit issues. """ products: List[Dict] = [] for batch in chunked(list(codes), BATCH_SIZE): for attempt in range(3): try: api.update_status() result = api.query( batch, domain="US", history=True, stats=LOOKBACK_DAYS, progress_bar=False, product_code_is_asin=product_code_is_asin, wait=True, ) products.extend(result) time.sleep(SLEEP_BETWEEN_BATCHES) break except Exception as exc: message = str(exc).lower() is_rate_limited = "token" in message or "rate" in message or "429" in message if attempt == 2: print(f"Skipping batch after repeated errors: {batch}") print(f"Last error: {exc}") break sleep_seconds = 30 if is_rate_limited else 10 print(f"Keepa request issue ({exc}). Sleeping {sleep_seconds}s and retrying...") time.sleep(sleep_seconds) return products def filter_opportunities(rows: Sequence[Dict]) -> pd.DataFrame: """ Keep only opportunities that pass the user's test filters: - ROI > 25% - sales rank < 1,000,000 - no obvious restrictions """ filtered = [ row for row in rows if row["ROI %"] > ROI_THRESHOLD * 100 and row["Sales Rank"] < MAX_SALES_RANK and not row["restricted"] ] if not filtered: return pd.DataFrame( columns=[ "ISBN/ASIN", "Title", "Amazon Used Price", "Sales Rank", "Est Buy Price", "Est Profit", "ROI %", "Keepa link", ] ) df = pd.DataFrame(filtered) df = df.sort_values(by=["ROI %", "Est Profit"], ascending=False).reset_index(drop=True) return df[ [ "ISBN/ASIN", "Title", "Amazon Used Price", "Sales Rank", "Est Buy Price", "Est Profit", "ROI %", "Keepa link", ] ] def scan_books(codes: Sequence[str], base_buy_price: float) -> pd.DataFrame: """Main scanner workflow.""" key = prompt_keepa_key() api = keepa.Keepa(key) isbn13_codes, asin_codes = split_codes(codes) all_products: List[Dict] = [] if isbn13_codes: print(f"Querying {len(isbn13_codes)} ISBN-13 codes...") all_products.extend( query_keepa_products(api, isbn13_codes, product_code_is_asin=False) ) if asin_codes: print(f"Querying {len(asin_codes)} ASINs/ISBN-10s...") all_products.extend( query_keepa_products(api, asin_codes, product_code_is_asin=True) ) analyzed_rows = [] for product in all_products: row = analyze_product(product, base_buy_price) if row: analyzed_rows.append(row) return filter_opportunities(analyzed_rows) def main() -> None: """ Entry point. This is written to work cleanly in: - local Python - Google Colab - a beginner's first script-editing workflow """ print("=" * 72) print("Book Arbitrage Opportunity Scanner (Keepa test version)") print("=" * 72) print(f"Run started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print() try: buy_price = prompt_buy_price() results_df = scan_books(SAMPLE_CODES, buy_price) print() print("Qualified opportunities:") print("- ROI > 25%") print("- Sales rank < 1,000,000") print("- No obvious restriction flags") print() pd.set_option("display.max_colwidth", 120) pd.set_option("display.width", 220) pd.set_option("display.max_columns", None) if results_df.empty: print("No qualifying opportunities found with the current inputs.") else: print(results_df.to_string(index=False)) print() print("Notes:") print("- This is a test scanner only; it does not buy, list, or repricer anything.") print("- Keepa access usually requires a paid API subscription, not a broad free tier.") print("- Exact Amazon restriction/ungating checks usually need Seller Central or third-party tools.") print("- If Keepa access is too limited, an alternative is manual Keepa CSV export parsing.") print() print("To run: Get free Keepa key -> pip install keepa pandas -> replace API key and ISBN list -> run.") except KeyboardInterrupt: print("\nStopped by user.") sys.exit(1) except Exception as exc: print(f"\nScanner failed: {exc}") sys.exit(1) if __name__ == "__main__": main()