Initial commit

Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
2026-03-09 14:17:13 +03:00
commit 084d1aebd5
64 changed files with 8605 additions and 0 deletions
--- a/config/models.default.yaml
+++ b/config/models.default.yaml
@@ -0,0 +1,50 @@
+# AI model configurations — each model references a credential and provides
+# the model string, optional openrouter routing (extra_body), and the prompt.
+# ${OUTPUT_FORMAT} is injected by the plugin from its hardcoded schema constant.
+# Override individual models in models.user.yaml.
+models:
+  vl_detect_shelves:
+    credentials: openrouter
+    model: "google/gemini-flash-1.5"
+    prompt: |
+      # ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorShelvesPlugin
+      Look at this photo of a bookcase/shelf unit.
+      Count the number of horizontal shelves visible.
+      For each interior boundary between adjacent shelves, give its vertical position
+      as a fraction 0-1 (0=top of image, 1=bottom). Do NOT include 0 or 1 themselves.
+      Return ONLY valid JSON, no explanation:
+      ${OUTPUT_FORMAT}
+
+  vl_detect_books:
+    credentials: openrouter
+    model: "google/gemini-flash-1.5"
+    prompt: |
+      # ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorBooksPlugin
+      Look at this shelf photo. Identify every book spine visible left-to-right.
+      For each interior boundary between adjacent books, give its horizontal position
+      as a fraction 0-1 (0=left edge of image, 1=right edge). Do NOT include 0 or 1.
+      Return ONLY valid JSON, no explanation:
+      ${OUTPUT_FORMAT}
+
+  vl_recognize:
+    credentials: openrouter
+    model: "google/gemini-flash-1.5"
+    prompt: |
+      # ${OUTPUT_FORMAT} — JSON schema injected by TextRecognizerPlugin
+      Look at this book spine image. Read all visible text exactly as it appears,
+      preserving line breaks between distinct text blocks.
+      Then use visual cues (font size, position, layout) to identify which part is the title,
+      author, publisher, year, and any other notable text.
+      Return ONLY valid JSON, no explanation:
+      ${OUTPUT_FORMAT}
+
+  ai_identify:
+    credentials: openrouter
+    model: "google/gemini-flash-1.5"
+    prompt: |
+      # ${RAW_TEXT}      — text read from the book spine (multi-line)
+      # ${OUTPUT_FORMAT} — JSON schema injected by BookIdentifierPlugin
+      The following text was read from a book spine:
+      ${RAW_TEXT}
+      Identify this book. Search for it if needed. Return ONLY valid JSON, no explanation:
+      ${OUTPUT_FORMAT}