002_create_knowledge_base.sql 1.3 KB

123456789101112131415161718192021222324
  1. -- ============================================================
  2. -- 002_create_knowledge_base.sql
  3. --
  4. -- Knowledge base for RAG (Retrieval-Augmented Generation).
  5. -- Stores chunked text from soil science books (Albrecht et al.)
  6. -- plus their vector embeddings from Ollama nomic-embed-text.
  7. --
  8. -- Run once:
  9. -- mysql -u <user> -p cropmonitor < database/migrations/002_create_knowledge_base.sql
  10. -- ============================================================
  11. CREATE TABLE IF NOT EXISTS `knowledge_chunks` (
  12. `id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
  13. `source` VARCHAR(255) NOT NULL COMMENT 'Book title or filename',
  14. `author` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'Author name',
  15. `page` SMALLINT NOT NULL DEFAULT 0 COMMENT 'Source PDF page number',
  16. `chunk_index` SMALLINT NOT NULL DEFAULT 0 COMMENT 'Chunk position within the source',
  17. `chunk_text` TEXT NOT NULL COMMENT 'Raw text of this chunk',
  18. `embedding` JSON NOT NULL COMMENT 'Float array from nomic-embed-text (768 dims)',
  19. `created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
  20. PRIMARY KEY (`id`),
  21. FULLTEXT KEY `ft_chunk_text` (`chunk_text`),
  22. KEY `idx_source` (`source`(100))
  23. ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;