false, 'error' => 'Not authenticated']); exit; } if ($_SERVER['REQUEST_METHOD'] !== 'POST') { http_response_code(405); echo json_encode(['success' => false, 'error' => 'Method not allowed']); exit; } if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) { http_response_code(403); echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']); exit; } $recordId = (int)trim($_POST['rid'] ?? ''); $randId = trim($_POST['rand'] ?? ''); $section = trim($_POST['section'] ?? ''); $recordType = trim($_POST['record_type'] ?? 'soil'); // soil | plant $validSoilSections = ['overview', 'ai_interpretation', 'foliar', 'microbial']; $validPlantSections = ['general', 'ai_interpretation', 'recommended', 'foliar']; $validSections = $recordType === 'plant' ? $validPlantSections : $validSoilSections; if (!$recordId || $randId === '' || !in_array($section, $validSections, true) || !in_array($recordType, ['soil', 'plant'], true)) { http_response_code(400); echo json_encode(['success' => false, 'error' => 'Invalid parameters']); exit; } // ── Load record + spec ──────────────────────────────────────────────────────── try { $pdo = getDBConnection(); if ($recordType === 'plant') { $stmt = $pdo->prepare('SELECT * FROM plant_records WHERE id = ? AND rand = ?'); $stmt->execute([$recordId, $randId]); $row = $stmt->fetch(PDO::FETCH_ASSOC); if (!$row) { http_response_code(404); echo json_encode(['success' => false, 'error' => 'Record not found']); exit; } $spec = []; if (!empty($row['crop_type'])) { $stmtSpec = $pdo->prepare('SELECT * FROM plant_specifications WHERE plant_type = ? LIMIT 1'); $stmtSpec->execute([$row['crop_type']]); $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: []; } } else { $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?'); $stmt->execute([$recordId, $randId]); $row = $stmt->fetch(PDO::FETCH_ASSOC); if (!$row) { http_response_code(404); echo json_encode(['success' => false, 'error' => 'Record not found']); exit; } $spec = []; if (!empty($row['soil_type'])) { $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1'); $stmtSpec->execute([$row['soil_type']]); $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: []; } } } catch (PDOException $e) { error_log('DB error in ollamaGenerate.php: ' . $e->getMessage()); http_response_code(500); echo json_encode(['success' => false, 'error' => 'Database error']); exit; } // ── Helpers ─────────────────────────────────────────────────────────────────── function fv(mixed $v, int $dp = 2): string { if ($v === null || $v === '') return 'N/A'; return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v; } function rangeStatus(mixed $value, mixed $min, mixed $max): string { if (!is_numeric($value)) return ''; $v = (float)$value; $lo = is_numeric($min) ? (float)$min : null; $hi = is_numeric($max) ? (float)$max : null; if ($lo !== null && $v < $lo) return '[DEFICIENT]'; if ($hi !== null && $v > $hi) return '[EXCESS]'; if ($lo !== null || $hi !== null) return '[IDEAL]'; return ''; } /** Resolve a value — check spec row first, then soil record row */ function sv(array $spec, array $row, string $col): mixed { if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col]; if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col]; return null; } $r = $row; $s = $spec; // ── Pre-compute all display values (heredocs don't support function calls) ──── $d_ph_h2o = fv($r['ph_h2o'], 1); $d_ph_h2o_st = rangeStatus($r['ph_h2o'], 6.2, 6.8); $d_ph_cacl2 = fv($r['ph_cacl2'], 1); $d_ec = fv($r['ec'], 2); $d_colour = $r['colour'] ?? 'N/A'; $d_texture = $r['texture'] ?? 'N/A'; $d_gravel = fv($r['gravel'], 1); $d_ocarbon = fv($r['ocarbon'], 1); $d_omatter = fv($r['omatter'], 1); $d_cec = fv($r['cec'], 2); $d_tec = fv($r['tec'], 2); $d_paramag = fv($r['paramag'], 0); $d_no3 = fv($r['NO3_N'], 0); $d_no3_st = rangeStatus($r['NO3_N'], 10, 20); $d_nh3 = fv($r['NH3_N'], 0); $d_cn_ratio = fv($r['c_n_ratio'], 1); $d_p_colwell = fv($r['p_colwell'], 0); $d_p_morgan = fv($r['p_morgan'], 0); $d_p_mehlick = fv($r['p_mehlick'], 0); $d_p_bray2 = fv($r['p_bray2'], 0); // Major cations $d_ca_ppm = fv($r['BS_ca_ppm'], 0); $d_ca_min = fv(sv($s,$r,'ca_ppm_min'), 0); $d_ca_max = fv(sv($s,$r,'ca_ppm_max'), 0); $d_ca_st = rangeStatus($r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')); $d_mg_ppm = fv($r['BS_mg_ppm'], 0); $d_mg_min = fv(sv($s,$r,'mg_ppm_min'), 0); $d_mg_max = fv(sv($s,$r,'mg_ppm_max'), 0); $d_mg_st = rangeStatus($r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')); $d_k_ppm = fv($r['BS_k_ppm'], 0); $d_k_min = fv(sv($s,$r,'k_ppm_min'), 0); $d_k_max = fv(sv($s,$r,'k_ppm_max'), 0); $d_k_st = rangeStatus($r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')); $d_na_ppm = fv($r['BS_na_ppm'], 0); $d_na_min = fv(sv($s,$r,'na_ppm_min'), 0); $d_na_max = fv(sv($s,$r,'na_ppm_max'), 0); $d_na_st = rangeStatus($r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')); // Base saturations $d_ca_bs = fv($r['BS_ca2'], 2); $d_ca_bs_min = fv(sv($s,$r,'cabs_min'), 1); $d_ca_bs_max = fv(sv($s,$r,'cabs_max'), 1); $d_ca_bs_st = rangeStatus($r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')); $d_mg_bs = fv($r['BS_mg2'], 2); $d_mg_bs_min = fv(sv($s,$r,'mgbs_min'), 1); $d_mg_bs_max = fv(sv($s,$r,'mgbs_max'), 1); $d_mg_bs_st = rangeStatus($r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')); $d_k_bs = fv($r['BS_k'], 2); $d_k_bs_min = fv(sv($s,$r,'kbs_min'), 1); $d_k_bs_max = fv(sv($s,$r,'kbs_max'), 1); $d_k_bs_st = rangeStatus($r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')); $d_na_bs = fv($r['BS_na'], 2); $d_na_bs_min = fv(sv($s,$r,'nabs_min'), 1); $d_na_bs_max = fv(sv($s,$r,'nabs_max'), 1); $d_na_bs_st = rangeStatus($r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')); $d_ob = fv($r['BS_ob'], 2); $d_ob_rec = fv(sv($s,$r,'ob_rec'), 1); $d_h = fv($r['BS_h'], 2); $d_h_rec = fv(sv($s,$r,'h_rec'), 1); $d_al3 = fv($r['BS_al3'], 2); // Morgans $d_ca_m = fv($r['ca_morgan'], 2); $d_mg_m = fv($r['mg_morgan'], 2); $d_k_m = fv($r['k_morgan'], 2); $d_na_m = fv($r['na_morgan'], 2); // Mehlick-3 $d_ca_me = fv($r['ca_mehlick3'], 2); $d_mg_me = fv($r['mg_mehlick3'], 2); $d_k_me = fv($r['k_mehlick3'], 2); $d_na_me = fv($r['na_mehlick3'], 2); $d_al_me = fv($r['al_mehlick3'], 2); // Trace elements $d_s = fv($r['s_morgan'], 2); $d_b = fv($r['b_cacl2'], 2); $d_mn = fv($r['mn_dtpa'], 2); $d_cu = fv($r['cu_dtpa'], 2); $d_zn = fv($r['zn_dtpa'], 2); $d_fe = fv($r['fe_dtpa'], 2); $d_fe_tot = fv($r['fe'], 2); $d_al = fv($r['al'], 2); $d_si = fv($r['sl_cacl2'], 2); $d_co = fv($r['co_dtpa'], 2); $d_mo = fv($r['m_dtpa'], 2); $d_se = fv($r['se'], 2); // Ratios $_ca_me_v = is_numeric($r['ca_mehlick3']) ? (float)$r['ca_mehlick3'] : 0; $_mg_me_v = is_numeric($r['mg_mehlick3']) ? (float)$r['mg_mehlick3'] : 0; $d_ca_mg_ratio = fv($_mg_me_v != 0 ? round($_ca_me_v / $_mg_me_v, 1) : null, 1); $d_ca_mg_ratio_rec = fv(sv($s,$r,'ca_mg_ratio'), 1); // Build comprehensive soil data block (ALL elements) $soilData = << (float)$hi) $excesses[] = $label; } $soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n"; $soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n"; $soilData .= "=====================================\n"; // ── System prompt per record type ──────────────────────────────────────────── $systemPrompts = [ 'soil' => "You are a certified agronomist and soil scientist specialising in mineral soil balancing, " . "trained in the Albrecht method of base saturation and cation exchange chemistry. " . "You have deep knowledge of soil chemistry, soil biology, and the relationship between " . "soil mineral balance and crop productivity, livestock health, and human nutrition. " . "You understand Australian soil types — Vertosols, Chromosols, Sodosols, Tenosols, " . "Dermosols and others — and how climate and rainfall influence nutrient behaviour. " . "Always ground your recommendations in the measured data. " . "Write in a professional but accessible tone suitable for a farmer-facing report. " . "When knowledge passages from the Albrecht literature are provided, " . "prefer them over your general training — they are the authoritative reference.", 'plant' => "You are a certified agronomist specialising in plant tissue analysis and crop nutrition. " . "You have deep knowledge of plant physiology, nutrient uptake mechanisms, and the " . "relationship between tissue nutrient levels and crop yield, quality, and disease resistance. " . "You are familiar with the CSIRO Plant Analysis Handbook, Hill Laboratories guidelines, " . "and PIRSA soil and plant analysis standards used in Australia. " . "You understand how soil mineral imbalances, pH, and antagonisms (e.g. Ca/Mg, Zn/P, " . "K/Mg) translate into plant deficiency or excess symptoms. " . "Always base your interpretation on the measured tissue values and specified ranges. " . "Write in a professional but accessible tone suitable for a farmer-facing report.", 'water' => "You are a certified irrigation agronomist and water quality specialist. " . "You have deep knowledge of water chemistry, salinity, sodicity, and the effects of " . "irrigation water quality on soil structure, nutrient availability, and crop health. " . "You are familiar with Australian irrigation guidelines (ANZECC/ARMCANZ), SAR and " . "EC thresholds for different soil types and crops, and bicarbonate/carbonate effects " . "on calcium and magnesium availability. " . "Always base your interpretation on the measured water analysis values. " . "Write in a professional but accessible tone suitable for a farmer-facing report.", 'animal' => "You are a certified animal nutritionist and ruminant dietitian with expertise in " . "livestock dietary mineral balance for Australian conditions. " . "You have deep knowledge of macro and trace mineral requirements for cattle, sheep, " . "and other livestock — including the relationship between pasture/feed mineral levels " . "and animal health, reproduction, and production outcomes. " . "You understand antagonisms such as Cu/Mo/S, Se/S, Zn/Fe, and the role of " . "Albrecht-balanced soils in producing nutritionally complete feed. " . "Always base your interpretation on the measured dietary analysis values. " . "Write in a professional but accessible tone suitable for a farmer or veterinarian.", 'compost' => "You are a certified agronomist and composting specialist with expertise in organic " . "matter management, compost maturity assessment, and the use of compost to restore " . "soil biology and mineral balance. " . "You have deep knowledge of C:N ratios, microbial succession, humus formation, " . "and how compost inputs interact with existing soil chemistry. " "You are familiar with the Elaine Ingham and Arden Anderson's compost guidelines, " . "You understand Australian composting standards and the role of biologically active " . "compost in supporting the Albrecht soil balancing philosophy. " . "Always base your interpretation on the measured compost analysis values. " . "Write in a professional but accessible tone suitable for a farmer-facing report.", ]; $system = $systemPrompts[$recordType] ?? $systemPrompts['soil']; // ── Build analysis data block + prompts depending on record type ────────────── if ($recordType === 'plant') { // ── Plant data summary ──────────────────────────────────────────────────── $p = $row; $ps = $spec; // Spec column map (matches plant_specifications DB schema) $pSpecCols = [ 'n' => ['n_min', 'n_max'], 'p' => ['P_Min', 'P_Max'], 'k' => ['K_Min', 'K_Max'], 's' => ['S_Min', 'S_Max'], 'mg' => ['Mg_Min', 'Mg_Max'], 'ca' => ['Ca_Min', 'Ca_Max'], 'na' => ['Na_Min', 'Na_Max'], 'fe' => ['Fe_Min', 'Fe_Max'], 'mn' => ['Mn_Min', 'Mn_Max'], 'zn' => ['Zn_Min', 'Zn_Max'], 'cu' => ['Cu_Min', 'Cu_Max'], 'b' => ['B_Min', 'B_Max'], 'm' => ['M_Min', 'M_Max'], 'co' => ['Co_min', 'Co_max'], 'se' => ['se_min', 'se_max'], 'cl' => ['cl_min', 'cl_max'], 'c' => ['c_min', 'c_max'], ]; $plantElements = [ ['n', 'Nitrogen', '%'], ['p', 'Phosphorus', '%'], ['k', 'Potassium', '%'], ['s', 'Sulfur', '%'], ['mg', 'Magnesium', '%'], ['ca', 'Calcium', '%'], ['na', 'Sodium', '%'], ['fe', 'Iron', 'ppm'], ['mn', 'Manganese', 'ppm'], ['zn', 'Zinc', 'ppm'], ['cu', 'Copper', 'ppm'], ['b', 'Boron', 'ppm'], ['m', 'Molybdenum','ppm'], ['co', 'Cobalt', 'ppm'], ['se', 'Selenium', 'ppm'], ['cl', 'Chloride', 'ppm'], ['c', 'Carbon', '%'], ]; $plantDeficiencies = []; $plantExcesses = []; $elementLines = ''; foreach ($plantElements as [$el, $name, $unit]) { $val = (float)($p[$el] ?? 0); [$minCol, $maxCol] = $pSpecCols[$el]; $min = (float)($ps[$minCol] ?? 0); $max = (float)($ps[$maxCol] ?? 0); $range = ($min > 0 || $max > 0) ? number_format($min, 3) . '–' . number_format($max, 3) : 'N/A'; $found = $val > 0 ? number_format($val, 3) : 'N/A'; $status = ''; if ($val > 0 && ($min > 0 || $max > 0)) { if ($min > 0 && $val < $min) { $status = '[DEFICIENT]'; $plantDeficiencies[] = $name; } elseif ($max > 0 && $val > $max) { $status = '[EXCESS]'; $plantExcesses[] = $name; } else { $status = '[IDEAL]'; } } $elementLines .= sprintf("%-20s %s found: %-8s range: %-15s %s\n", "$name ($unit):", '', $found, $range, $status); } $plantData = "=====================================\n" . "PLANT TISSUE ANALYSIS — COMPLETE\n" . "=====================================\n" . "Client: {$p['client_name']}\n" . "Crop Type: {$p['crop_type']}\n" . "Sample ID: {$p['sample_id']}\n" . "Site ID: {$p['site_id']}\n" . "Lab No: {$p['lab_no']}\n" . "Date Sampled: {$p['date_sampled']}\n\n" . "--- ELEMENT RESULTS ---\n" . $elementLines . "\n" . "Deficient: " . (empty($plantDeficiencies) ? 'None detected' : implode(', ', $plantDeficiencies)) . "\n" . "In Excess: " . (empty($plantExcesses) ? 'None detected' : implode(', ', $plantExcesses)) . "\n" . "=====================================\n"; $ragChunks = retrieveRelevantChunks($pdo, $plantData, $section, RAG_TOP_K); $knowledgeContext = ''; if (!empty($ragChunks)) { $knowledgeContext = "\n\n===================================================\n" . "RELEVANT PASSAGES FROM SOIL & PLANT SCIENCE LITERATURE\n" . "===================================================\n"; foreach ($ragChunks as $i => $chunk) { $knowledgeContext .= sprintf("\n[%d] \"%s\" — %s (p.%d)\n%s\n", $i + 1, $chunk['source'], $chunk['author'], $chunk['page'], $chunk['chunk_text']); } } $ctx = $plantData . $knowledgeContext; $prompts = [ 'general' => "{$system}\n\n{$ctx}\n\n" . "TASK: Write a concise general comment on these plant tissue analysis results (2–3 paragraphs). " . "Cover: (1) the overall nutritional status of the crop, " . "(2) the most significant deficiencies or excesses and their likely effect on crop yield and quality, " . "(3) any elements in good balance. Do not recommend specific product names.", 'ai_interpretation' => "{$system}\n\n{$ctx}\n\n" . "TASK: Write a detailed technical interpretation with these headings:\n" . "1. MAJOR ELEMENTS (N, P, K, S, Mg, Ca, Na)\n" . "2. TRACE ELEMENTS (Fe, Mn, Zn, Cu, B)\n" . "3. OTHER ELEMENTS (Mo, Co, Se, Cl, C)\n" . "4. NUTRIENT INTERACTIONS & ANTAGONISMS\n" . "5. OVERALL NUTRITIONAL ASSESSMENT\n" . "For each element marked [DEFICIENT] or [EXCESS], explain the agronomic significance, " . "likely causes, and interactions with other nutrients.", 'recommended' => "{$system}\n\n{$ctx}\n\n" . "TASK: Design a recommended remedial soil and fertiliser program to correct the deficiencies shown. " . "Format as a numbered list or table: " . "Element | Current Status | Recommended Action | Product Type (generic) | Rate | Timing. " . "Prioritise elements marked [DEFICIENT]. " . "Note any nutrient antagonisms that may be limiting uptake.", 'foliar' => "{$system}\n\n{$ctx}\n\n" . "TASK: Design a foliar spray program to rapidly correct deficiencies shown in the tissue test. " . "Format as a numbered list or table: " . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. " . "Prioritise elements marked [DEFICIENT]. " . "Note carrier water pH requirements and any tank-mix incompatibilities.", ]; } else { // ── Soil: RAG + prompts ─────────────────────────────────────────────────── $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K); $knowledgeContext = ''; if (!empty($ragChunks)) { $knowledgeContext = "\n\n===================================================\n" . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n" . "(William A. Albrecht and other authorities)\n" . "===================================================\n"; foreach ($ragChunks as $i => $chunk) { $knowledgeContext .= sprintf("\n[%d] \"%s\" — %s (p.%d)\n%s\n", $i + 1, $chunk['source'], $chunk['author'], $chunk['page'], $chunk['chunk_text']); } } $ctx = $soilData . $knowledgeContext; $prompts = [ 'overview' => "{$system}\n\n{$ctx}\n\n" . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). " . "Cover: (1) overall soil health and fertility level, " . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, " . "(3) any positive attributes. " . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.", 'ai_interpretation' => "{$system}\n\n{$ctx}\n\n" . "TASK: Write a detailed technical interpretation structured with these headings:\n" . "1. SOIL REACTION (pH, EC, Paramagnetic)\n" . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n" . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n" . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n" . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n" . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n" . "7. OVERALL SOIL BALANCE ASSESSMENT\n" . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance " . "and interactions with other elements. Reference the Albrecht literature where relevant.", 'foliar' => "{$system}\n\n{$ctx}\n\n" . "TASK: Design a foliar nutrition program to address the deficiencies shown. " . "Format as a numbered list or table: " . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. " . "Prioritise elements marked [DEFICIENT]. " . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). " . "Add a note on carrier water pH and adjuvant recommendations.", 'microbial' => "{$system}\n\n{$ctx}\n\n" . "TASK: Design a biological/microbial soil improvement program. " . "Structure your response:\n" . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n" . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n" . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n" . "4. TIMING & INTEGRATION with the mineral balancing program\n" . "Reference Albrecht's work on the relationship between mineral balance and soil biology.", ]; } // ── Call Ollama ─────────────────────────────────────────────────────────────── $payload = json_encode([ 'model' => OLLAMA_MODEL, 'prompt' => $prompts[$section], 'stream' => false, 'options' => [ 'temperature' => 0.3, 'num_predict' => 2048, 'num_ctx' => 6144, 'repeat_penalty' => 1.1, 'keep_alive' => -1, // keep model resident between requests ], ]); $ch = curl_init(OLLAMA_HOST . '/api/generate'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => OLLAMA_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 5, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($curlErr || $response === false) { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]); exit; } if ($httpCode !== 200) { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]); exit; } $ollamaData = json_decode($response, true); $text = trim($ollamaData['response'] ?? ''); if ($text === '') { http_response_code(502); echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']); exit; } echo json_encode([ 'success' => true, 'text' => $text, 'rag_chunks_used' => count($ragChunks), ]); exit; // ── RAG: retrieve relevant knowledge chunks from MySQL ──────────────────────── function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array { try { $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn(); } catch (PDOException $e) { return []; // Table doesn't exist yet } if ($count === 0) { return []; // Knowledge base not yet populated — run ingest_knowledge.php } // Try vector similarity first $queryEmbedding = getQueryEmbedding($queryText); if ($queryEmbedding !== null) { return vectorSearch($pdo, $queryEmbedding, $topK); } // Fallback: MySQL FULLTEXT search return fulltextSearch($pdo, $section, $topK); } function getQueryEmbedding(string $text): ?array { $queryText = substr($text, 0, 2000); // Try new /api/embed (Ollama >= 0.1.26) first $ch = curl_init(OLLAMA_HOST . '/api/embed'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp && $code === 200) { $data = json_decode($resp, true); $emb = $data['embeddings'][0] ?? null; if (is_array($emb) && count($emb) > 0) return $emb; } // Fallback: legacy /api/embeddings $ch = curl_init(OLLAMA_HOST . '/api/embeddings'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp2 = curl_exec($ch); $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp2 && $code2 === 200) { $data2 = json_decode($resp2, true); $emb2 = $data2['embedding'] ?? null; if (is_array($emb2) && count($emb2) > 0) return $emb2; } return null; } function vectorSearch(PDO $pdo, array $queryVec, int $topK): array { $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks'); $scores = []; while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) { $chunkVec = json_decode($row['embedding'], true); if (!is_array($chunkVec)) continue; $scores[] = [ 'score' => cosineSimilarity($queryVec, $chunkVec), 'source' => $row['source'], 'author' => $row['author'], 'page' => $row['page'], 'chunk_text' => $row['chunk_text'], ]; } usort($scores, fn($a, $b) => $b['score'] <=> $a['score']); return array_slice($scores, 0, $topK); } function fulltextSearch(PDO $pdo, string $section, int $topK): array { $keywords = [ 'overview' => 'soil fertility mineral balance calcium magnesium albrecht', 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter', 'foliar' => 'foliar nutrition trace elements deficiency correction spray', 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus', ]; $query = $keywords[$section] ?? 'soil fertility mineral nutrition'; try { $stmt = $pdo->prepare( 'SELECT source, author, page, chunk_text, MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score FROM knowledge_chunks WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) ORDER BY score DESC LIMIT ?' ); $stmt->execute([$query, $query, $topK]); return $stmt->fetchAll(PDO::FETCH_ASSOC); } catch (PDOException $e) { error_log('RAG fulltext search failed: ' . $e->getMessage()); return []; } } function cosineSimilarity(array $a, array $b): float { $dot = $normA = $normB = 0.0; $len = min(count($a), count($b)); for ($i = 0; $i < $len; $i++) { $dot += $a[$i] * $b[$i]; $normA += $a[$i] * $a[$i]; $normB += $b[$i] * $b[$i]; } $denom = sqrt($normA) * sqrt($normB); return $denom > 0 ? $dot / $denom : 0.0; }