false, 'error' => 'Not authenticated']); exit; } if ($_SERVER['REQUEST_METHOD'] !== 'POST') { http_response_code(405); echo json_encode(['success' => false, 'error' => 'Method not allowed']); exit; } if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) { http_response_code(403); echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']); exit; } $recordId = (int)trim($_POST['rid'] ?? ''); $randId = trim($_POST['rand'] ?? ''); $section = trim($_POST['section'] ?? ''); $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial']; if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) { http_response_code(400); echo json_encode(['success' => false, 'error' => 'Invalid parameters']); exit; } // ── Load soil record + spec ─────────────────────────────────────────────────── try { $pdo = getDBConnection(); $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?'); $stmt->execute([$recordId, $randId]); $row = $stmt->fetch(PDO::FETCH_ASSOC); if (!$row) { http_response_code(404); echo json_encode(['success' => false, 'error' => 'Record not found']); exit; } $spec = []; if (!empty($row['soil_type'])) { $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1'); $stmtSpec->execute([$row['soil_type']]); $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: []; } } catch (PDOException $e) { error_log('DB error in ollamaGenerate.php: ' . $e->getMessage()); http_response_code(500); echo json_encode(['success' => false, 'error' => 'Database error']); exit; } // ── Helper: safe float format ──────────────────────────────────────────────── function fv(mixed $v, int $dp = 2): string { if ($v === null || $v === '') return 'N/A'; return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v; } // ── Helper: status vs spec range ───────────────────────────────────────────── function rangeStatus(mixed $value, mixed $min, mixed $max): string { if (!is_numeric($value)) return ''; $v = (float)$value; $lo = is_numeric($min) ? (float)$min : null; $hi = is_numeric($max) ? (float)$max : null; if ($lo !== null && $v < $lo) return '[DEFICIENT]'; if ($hi !== null && $v > $hi) return '[EXCESS]'; if ($lo !== null || $hi !== null) return '[IDEAL]'; return ''; } // ── Helper: resolve spec value from spec row then record row ───────────────── function sv(array $spec, array $row, string $col): mixed { if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col]; if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col]; return null; } $r = $row; $s = $spec; // ── Build comprehensive soil data block ─────────────────────────────────────── // Includes ALL measured elements with status against spec targets $soilData = << (float)$hi) $excesses[] = $label; } $soilData .= "\nDeficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)); $soilData .= "\nIn Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)); $soilData .= "\n=====================================\n"; // ── RAG: embed the soil data query, retrieve relevant book passages ─────────── $knowledgeContext = ''; $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K); if (!empty($ragChunks)) { $knowledgeContext = "\n\n===================================================\n" . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n" . "(William A. Albrecht and other authorities)\n" . "===================================================\n"; foreach ($ragChunks as $i => $chunk) { $knowledgeContext .= sprintf( "\n[%d] \"%s\" — %s (p.%d)\n%s\n", $i + 1, $chunk['source'], $chunk['author'], $chunk['page'], $chunk['chunk_text'] ); } } // ── Section-specific system prompts ────────────────────────────────────────── $systemInstruction = "You are a certified agronomist specialising in soil fertility, " . "trained in the Albrecht method of soil balancing. " . "You have deep knowledge of soil chemistry, plant nutrition, and the relationship " . "between soil mineral balance and crop/livestock health. " . "Always ground your recommendations in the measured data. " . "For Australian conditions, reference typical soil types and climate where relevant. " . "Write in a professional but accessible tone suitable for a farmer-facing report. " . "When the knowledge passages conflict with your training, prefer the passages — they " . "are from authoritative soil science texts."; $baseContext = $soilData . $knowledgeContext; $prompts = [ 'overview' => $systemInstruction . "\n\n" . $baseContext . "\n\nTASK: Write an executive overview of these soil test results (3–4 paragraphs). " . "Cover: (1) overall soil health and fertility level, " . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, " . "(3) any positive attributes of this soil. " . "Use the Albrecht philosophy as a framework where applicable. " . "Do not list specific product names in this section.", 'ai_interpretation' => $systemInstruction . "\n\n" . $baseContext . "\n\nTASK: Write a detailed technical interpretation of ALL elements in this soil test. " . "Structure your response with these sections:\n" . "1. SOIL REACTION (pH, EC, Paramagnetic)\n" . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n" . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n" . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n" . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n" . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n" . "7. OVERALL SOIL BALANCE ASSESSMENT\n" . "For each element marked [DEFICIENT] or [EXCESS], explain the agronomic significance " . "and interactions with other elements. Reference the Albrecht literature where relevant.", 'foliar' => $systemInstruction . "\n\n" . $baseContext . "\n\nTASK: Design a foliar nutrition program to address the deficiencies shown. " . "Format the program as a table or numbered list with: " . "Growth Stage | Product Type | Active Element | Rate (L or kg/ha) | Timing/Frequency. " . "Prioritise elements marked [DEFICIENT]. " . "Note any antagonisms (e.g. Ca/Mg competition, Zn/P interaction, K/Mg lockout). " . "Keep product recommendations generic (e.g. 'chelated zinc', 'calcium nitrate') " . "rather than brand names. " . "Add a note on carrier water pH and adjuvant recommendations.", 'microbial' => $systemInstruction . "\n\n" . $baseContext . "\n\nTASK: Design a biological/microbial soil improvement program. " . "Consider the organic matter level, C:N ratio, pH, and base saturation balance shown. " . "Structure your response:\n" . "1. CURRENT BIOLOGY ASSESSMENT (based on OM, C:N, pH)\n" . "2. RECOMMENDED INOCULANTS (e.g. mycorrhizae, rhizobia, EM, compost tea)\n" . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n" . "4. TIMING & INTEGRATION with the soil balancing program\n" . "Reference Albrecht's work on the relationship between mineral balance and soil biology.", ]; // ── Call Ollama ─────────────────────────────────────────────────────────────── $payload = json_encode([ 'model' => OLLAMA_MODEL, 'prompt' => $prompts[$section], 'stream' => false, 'options' => [ 'temperature' => 0.3, // lower = more factual / less creative 'num_predict' => 2048, ], ]); $ch = curl_init(OLLAMA_HOST . '/api/generate'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => OLLAMA_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 5, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($curlErr || $response === false) { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]); exit; } if ($httpCode !== 200) { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]); exit; } $ollamaData = json_decode($response, true); $text = trim($ollamaData['response'] ?? ''); if ($text === '') { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']); exit; } echo json_encode([ 'success' => true, 'text' => $text, 'rag_chunks_used' => count($ragChunks), ]); exit; // ── RAG retrieval ──────────────────────────────────────────────────────────── /** * Embed a query string, then retrieve the top-K most similar knowledge chunks. * Falls back to MySQL FULLTEXT search if no embeddings are in the table or * if the embedding API is unavailable. * * @param PDO $pdo * @param string $queryText The soil data summary used as the retrieval query * @param string $section Current section (used to build keyword fallback) * @param int $topK * @return array Array of row arrays (source, author, page, chunk_text) */ function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array { // Check if we have any chunks at all $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn(); if ($count === 0) { return []; // Knowledge base not yet populated } // ── Try vector similarity search first ────────────────────────────────── $queryEmbedding = getQueryEmbedding($queryText); if ($queryEmbedding !== null) { return vectorSearch($pdo, $queryEmbedding, $topK); } // ── Fallback: MySQL FULLTEXT search ───────────────────────────────────── return fulltextSearch($pdo, $section, $topK); } /** * Embed text via Ollama. Tries new /api/embed first, falls back to legacy * /api/embeddings. Returns float[] or null on failure. */ function getQueryEmbedding(string $text): ?array { $queryText = substr($text, 0, 2000); // ── New API (/api/embed, Ollama >= 0.1.26) ─────────────────────────────── $ch = curl_init(OLLAMA_HOST . '/api/embed'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp && $code === 200) { $data = json_decode($resp, true); $emb = $data['embeddings'][0] ?? null; if (is_array($emb) && count($emb) > 0) return $emb; } // ── Legacy API (/api/embeddings) ───────────────────────────────────────── $ch = curl_init(OLLAMA_HOST . '/api/embeddings'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp2 = curl_exec($ch); $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp2 && $code2 === 200) { $data2 = json_decode($resp2, true); $emb2 = $data2['embedding'] ?? null; if (is_array($emb2) && count($emb2) > 0) return $emb2; } return null; } /** * Load all chunk embeddings from DB, compute cosine similarity, return top-K. * For corpora up to ~10k chunks this is fast enough in PHP. */ function vectorSearch(PDO $pdo, array $queryVec, int $topK): array { $stmt = $pdo->query( 'SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks' ); $scores = []; while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { $chunkVec = json_decode($row['embedding'], true); if (!is_array($chunkVec)) continue; $sim = cosineSimilarity($queryVec, $chunkVec); $scores[] = [ 'score' => $sim, 'source' => $row['source'], 'author' => $row['author'], 'page' => $row['page'], 'chunk_text' => $row['chunk_text'], ]; } // Sort descending by score, return top-K usort($scores, fn($a, $b) => $b['score'] <=> $a['score']); return array_slice($scores, 0, $topK); } /** * MySQL FULLTEXT fallback when embeddings aren't available. */ function fulltextSearch(PDO $pdo, string $section, int $topK): array { // Section-specific keyword hints for the search $keywords = [ 'overview' => 'soil fertility mineral balance calcium magnesium', 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter', 'foliar' => 'foliar nutrition trace elements deficiency correction spray', 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus', ]; $query = $keywords[$section] ?? 'soil fertility mineral nutrition'; try { $stmt = $pdo->prepare( 'SELECT source, author, page, chunk_text, MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score FROM knowledge_chunks WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) ORDER BY score DESC LIMIT ?' ); $stmt->execute([$query, $query, $topK]); return $stmt->fetchAll(PDO::FETCH_ASSOC); } catch (PDOException $e) { error_log('RAG fulltext search failed: ' . $e->getMessage()); return []; } } /** * Cosine similarity between two equal-length float vectors. */ function cosineSimilarity(array $a, array $b): float { $dot = 0.0; $normA = 0.0; $normB = 0.0; $len = min(count($a), count($b)); for ($i = 0; $i < $len; $i++) { $dot += $a[$i] * $b[$i]; $normA += $a[$i] * $a[$i]; $normB += $b[$i] * $b[$i]; } $denom = sqrt($normA) * sqrt($normB); return $denom > 0 ? $dot / $denom : 0.0; }