bookshelf/config/models.default.yaml

# AI model configurations — each model references a credential and provides
# the model string, optional openrouter routing (extra_body), and the prompt.
# ${OUTPUT_FORMAT} is injected by the plugin from its hardcoded schema constant.
# Override individual models in models.user.yaml.
models:
  vl_detect_shelves:
    credentials: openrouter
    model: "google/gemini-flash-1.5"
    prompt: |
      # ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorShelvesPlugin
      Look at this photo of a bookcase/shelf unit.
      Count the number of horizontal shelves visible.
      For each interior boundary between adjacent shelves, give its vertical position
      as a fraction 0-1 (0=top of image, 1=bottom). Do NOT include 0 or 1 themselves.
      Return ONLY valid JSON, no explanation:
      ${OUTPUT_FORMAT}

  vl_detect_books:
    credentials: openrouter
    model: "google/gemini-flash-1.5"
    prompt: |
      # ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorBooksPlugin
      Look at this shelf photo. Identify every book spine visible left-to-right.
      For each interior boundary between adjacent books, give its horizontal position
      as a fraction 0-1 (0=left edge of image, 1=right edge). Do NOT include 0 or 1.
      Return ONLY valid JSON, no explanation:
      ${OUTPUT_FORMAT}

  vl_recognize:
    credentials: openrouter
    model: "google/gemini-flash-1.5"
    prompt: |
      # ${OUTPUT_FORMAT} — JSON schema injected by TextRecognizerPlugin
      Look at this book spine image. Read all visible text exactly as it appears,
      preserving line breaks between distinct text blocks.
      Then use visual cues (font size, position, layout) to identify which part is the title,
      author, publisher, year, and any other notable text.
      Return ONLY valid JSON, no explanation:
      ${OUTPUT_FORMAT}

  ai_identify:
    credentials: openrouter
    model: "google/gemini-flash-1.5"
    prompt: |
      # ${RAW_TEXT}           — text read from the book spine (multi-line)
      # ${ARCHIVE_RESULTS}    — JSON array of candidate records from library archives
      # ${OUTPUT_FORMAT}      — JSON schema injected by BookIdentifierPlugin
      Text read from the book spine:
      ${RAW_TEXT}

      Archive search results (may be empty):
      ${ARCHIVE_RESULTS}

      Your task:
      1. Search the web for this book if needed to find additional information.
      2. Combine the spine text, archive results, and your web search into identification candidates.
      3. Collapse candidates that are clearly the same book (same title + author + year + publisher) into one entry, listing all contributing sources.
      4. Rank candidates by confidence (highest first). Assign a score 0.0-1.0.
      5. Remove any candidates you believe are irrelevant or clearly wrong.

      IMPORTANT — confidence scoring rules:
      - The score must reflect how well the found information matches the spine text and recognized data.
      - If the only available evidence is a title with no author, year, publisher, or corroborating archive results, the score must not exceed 0.5.
      - Base confidence on: quality of spine text match, number of matching fields, archive result corroboration, and completeness of the identified record.
      - A record with title + author + year that appears in multiple archive sources warrants a high score; a record with only a guessed title warrants a low score.

      IMPORTANT — output format rules:
      - The JSON schema below is a format specification only. Do NOT use it as a source of example data.
      - Do NOT return placeholder values such as "The Great Gatsby", "Unknown Author", "Example Publisher", or any other generic example text unless that exact text literally appears on the spine.
      - Return only real books that could plausibly match what is shown on this spine.
      - If you cannot identify the book with reasonable confidence, return an empty array [].

      Return ONLY valid JSON matching the schema below, no explanation:
      ${OUTPUT_FORMAT}