| 123456789101112131415161718192021222324 |
- -- ============================================================
- -- 002_create_knowledge_base.sql
- --
- -- Knowledge base for RAG (Retrieval-Augmented Generation).
- -- Stores chunked text from soil science books (Albrecht et al.)
- -- plus their vector embeddings from Ollama nomic-embed-text.
- --
- -- Run once:
- -- mysql -u <user> -p cropmonitor < database/migrations/002_create_knowledge_base.sql
- -- ============================================================
- CREATE TABLE IF NOT EXISTS `knowledge_chunks` (
- `id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
- `source` VARCHAR(255) NOT NULL COMMENT 'Book title or filename',
- `author` VARCHAR(255) NOT NULL DEFAULT '' COMMENT 'Author name',
- `page` SMALLINT NOT NULL DEFAULT 0 COMMENT 'Source PDF page number',
- `chunk_index` SMALLINT NOT NULL DEFAULT 0 COMMENT 'Chunk position within the source',
- `chunk_text` TEXT NOT NULL COMMENT 'Raw text of this chunk',
- `embedding` JSON NOT NULL COMMENT 'Float array from nomic-embed-text (768 dims)',
- `created_at` DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
- PRIMARY KEY (`id`),
- FULLTEXT KEY `ft_chunk_text` (`chunk_text`),
- KEY `idx_source` (`source`(100))
- ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|