| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589 |
- <?php
- error_reporting(E_ALL);
- ini_set('display_errors', 1);
- /**
- * controllers/ollamaGenerate.php
- *
- * AJAX POST handler: generates AI agronomic text using Ollama, grounded
- * with relevant passages retrieved from the soil science knowledge base
- * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
- *
- * Flow:
- * 1. Load full soil record + specification ranges from DB
- * 2. Build a structured data summary covering ALL measured elements
- * 3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
- * 4. Inject retrieved passages + data into a section-specific prompt
- * 5. Send to llama3.1 and return the generated text
- *
- * POST params:
- * csrf_token string
- * rid int soil_records.id
- * rand string soil_records.rand
- * section string overview | ai_interpretation | foliar | microbial
- *
- * Note: run ingestion from Windows where Ollama is accessible:
- * php tools/ingest_knowledge.php --test
- * php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
- */
- if (session_status() === PHP_SESSION_NONE) {
- session_start();
- }
- require_once __DIR__ . '/../config/database.php';
- require_once __DIR__ . '/../lib/auth.php';
- require_once __DIR__ . '/../lib/csrf.php';
- header('Content-Type: application/json');
- // ── Config ───────────────────────────────────────────────────────────────────
- define('OLLAMA_HOST', 'http://192.168.8.73:11434');
- define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
- define('EMBED_MODEL', 'nomic-embed-text');
- define('RAG_TOP_K', 6); // book passages injected per request
- define('OLLAMA_TIMEOUT', 180); // seconds — LLM can be slow
- // ── Auth + CSRF ───────────────────────────────────────────────────────────────
- if (!isLoggedIn()) {
- http_response_code(401);
- echo json_encode(['success' => false, 'error' => 'Not authenticated']);
- exit;
- }
- if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
- http_response_code(405);
- echo json_encode(['success' => false, 'error' => 'Method not allowed']);
- exit;
- }
- if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
- http_response_code(403);
- echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']);
- exit;
- }
- $recordId = (int)trim($_POST['rid'] ?? '');
- $randId = trim($_POST['rand'] ?? '');
- $section = trim($_POST['section'] ?? '');
- $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
- if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
- http_response_code(400);
- echo json_encode(['success' => false, 'error' => 'Invalid parameters']);
- exit;
- }
- // ── Load soil record + spec ───────────────────────────────────────────────────
- try {
- $pdo = getDBConnection();
- $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?');
- $stmt->execute([$recordId, $randId]);
- $row = $stmt->fetch(PDO::FETCH_ASSOC);
- if (!$row) {
- http_response_code(404);
- echo json_encode(['success' => false, 'error' => 'Record not found']);
- exit;
- }
- $spec = [];
- if (!empty($row['soil_type'])) {
- $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1');
- $stmtSpec->execute([$row['soil_type']]);
- $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: [];
- }
- } catch (PDOException $e) {
- error_log('DB error in ollamaGenerate.php: ' . $e->getMessage());
- http_response_code(500);
- echo json_encode(['success' => false, 'error' => 'Database error']);
- exit;
- }
- // ── Helpers ───────────────────────────────────────────────────────────────────
- function fv(mixed $v, int $dp = 2): string
- {
- if ($v === null || $v === '') return 'N/A';
- return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
- }
- function rangeStatus(mixed $value, mixed $min, mixed $max): string
- {
- if (!is_numeric($value)) return '';
- $v = (float)$value;
- $lo = is_numeric($min) ? (float)$min : null;
- $hi = is_numeric($max) ? (float)$max : null;
- if ($lo !== null && $v < $lo) return '[DEFICIENT]';
- if ($hi !== null && $v > $hi) return '[EXCESS]';
- if ($lo !== null || $hi !== null) return '[IDEAL]';
- return '';
- }
- /** Resolve a value — check spec row first, then soil record row */
- function sv(array $spec, array $row, string $col): mixed
- {
- if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
- if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col];
- return null;
- }
- $r = $row;
- $s = $spec;
- // ── Pre-compute all display values (heredocs don't support function calls) ────
- $d_ph_h2o = fv($r['ph_h2o'], 1); $d_ph_h2o_st = rangeStatus($r['ph_h2o'], 6.2, 6.8);
- $d_ph_cacl2 = fv($r['ph_cacl2'], 1);
- $d_ec = fv($r['ec'], 2);
- $d_colour = $r['colour'] ?? 'N/A';
- $d_texture = $r['texture'] ?? 'N/A';
- $d_gravel = fv($r['gravel'], 1);
- $d_ocarbon = fv($r['ocarbon'], 1);
- $d_omatter = fv($r['omatter'], 1);
- $d_cec = fv($r['cec'], 2);
- $d_tec = fv($r['tec'], 2);
- $d_paramag = fv($r['paramag'], 0);
- $d_no3 = fv($r['NO3_N'], 0); $d_no3_st = rangeStatus($r['NO3_N'], 10, 20);
- $d_nh3 = fv($r['NH3_N'], 0);
- $d_cn_ratio = fv($r['c_n_ratio'], 1);
- $d_p_colwell = fv($r['p_colwell'], 0);
- $d_p_morgan = fv($r['p_morgan'], 0);
- $d_p_mehlick = fv($r['p_mehlick'], 0);
- $d_p_bray2 = fv($r['p_bray2'], 0);
- // Major cations
- $d_ca_ppm = fv($r['BS_ca_ppm'], 0);
- $d_ca_min = fv(sv($s,$r,'ca_ppm_min'), 0); $d_ca_max = fv(sv($s,$r,'ca_ppm_max'), 0);
- $d_ca_st = rangeStatus($r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max'));
- $d_mg_ppm = fv($r['BS_mg_ppm'], 0);
- $d_mg_min = fv(sv($s,$r,'mg_ppm_min'), 0); $d_mg_max = fv(sv($s,$r,'mg_ppm_max'), 0);
- $d_mg_st = rangeStatus($r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max'));
- $d_k_ppm = fv($r['BS_k_ppm'], 0);
- $d_k_min = fv(sv($s,$r,'k_ppm_min'), 0); $d_k_max = fv(sv($s,$r,'k_ppm_max'), 0);
- $d_k_st = rangeStatus($r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max'));
- $d_na_ppm = fv($r['BS_na_ppm'], 0);
- $d_na_min = fv(sv($s,$r,'na_ppm_min'), 0); $d_na_max = fv(sv($s,$r,'na_ppm_max'), 0);
- $d_na_st = rangeStatus($r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max'));
- // Base saturations
- $d_ca_bs = fv($r['BS_ca2'], 2);
- $d_ca_bs_min = fv(sv($s,$r,'cabs_min'), 1); $d_ca_bs_max = fv(sv($s,$r,'cabs_max'), 1);
- $d_ca_bs_st = rangeStatus($r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max'));
- $d_mg_bs = fv($r['BS_mg2'], 2);
- $d_mg_bs_min = fv(sv($s,$r,'mgbs_min'), 1); $d_mg_bs_max = fv(sv($s,$r,'mgbs_max'), 1);
- $d_mg_bs_st = rangeStatus($r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max'));
- $d_k_bs = fv($r['BS_k'], 2);
- $d_k_bs_min = fv(sv($s,$r,'kbs_min'), 1); $d_k_bs_max = fv(sv($s,$r,'kbs_max'), 1);
- $d_k_bs_st = rangeStatus($r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max'));
- $d_na_bs = fv($r['BS_na'], 2);
- $d_na_bs_min = fv(sv($s,$r,'nabs_min'), 1); $d_na_bs_max = fv(sv($s,$r,'nabs_max'), 1);
- $d_na_bs_st = rangeStatus($r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max'));
- $d_ob = fv($r['BS_ob'], 2); $d_ob_rec = fv(sv($s,$r,'ob_rec'), 1);
- $d_h = fv($r['BS_h'], 2); $d_h_rec = fv(sv($s,$r,'h_rec'), 1);
- $d_al3 = fv($r['BS_al3'], 2);
- // Morgans
- $d_ca_m = fv($r['ca_morgan'], 2); $d_mg_m = fv($r['mg_morgan'], 2);
- $d_k_m = fv($r['k_morgan'], 2); $d_na_m = fv($r['na_morgan'], 2);
- // Mehlick-3
- $d_ca_me = fv($r['ca_mehlick3'], 2); $d_mg_me = fv($r['mg_mehlick3'], 2);
- $d_k_me = fv($r['k_mehlick3'], 2); $d_na_me = fv($r['na_mehlick3'], 2);
- $d_al_me = fv($r['al_mehlick3'], 2);
- // Trace elements
- $d_s = fv($r['s_morgan'], 2); $d_b = fv($r['b_cacl2'], 2);
- $d_mn = fv($r['mn_dtpa'], 2); $d_cu = fv($r['cu_dtpa'], 2);
- $d_zn = fv($r['zn_dtpa'], 2); $d_fe = fv($r['fe_dtpa'], 2);
- $d_fe_tot = fv($r['fe'], 2); $d_al = fv($r['al'], 2);
- $d_si = fv($r['sl_cacl2'], 2); $d_co = fv($r['co_dtpa'], 2);
- $d_mo = fv($r['m_dtpa'], 2); $d_se = fv($r['se'], 2);
- // Ratios
- $_ca_me_v = is_numeric($r['ca_mehlick3']) ? (float)$r['ca_mehlick3'] : 0;
- $_mg_me_v = is_numeric($r['mg_mehlick3']) ? (float)$r['mg_mehlick3'] : 0;
- $d_ca_mg_ratio = fv($_mg_me_v != 0 ? round($_ca_me_v / $_mg_me_v, 1) : null, 1);
- $d_ca_mg_ratio_rec = fv(sv($s,$r,'ca_mg_ratio'), 1);
- // Build comprehensive soil data block (ALL elements)
- $soilData = <<<TEXT
- =====================================
- SOIL TEST DATA — COMPLETE ANALYSIS
- =====================================
- Client: {$r['client_name']}
- Location: {$r['site_address']}, {$r['state_postcode']}
- Crop: {$r['sample_id']}
- Crop Type: {$r['crop_type']}
- Soil Type: {$r['soil_type']}
- Lab No: {$r['lab_no']}
- Date Sampled: {$r['date_sampled']}
- --- SOIL PHYSICAL / REACTION ---
- pH (H2O): $d_ph_h2o [target: 6.2-6.8] $d_ph_h2o_st
- pH (CaCl2): $d_ph_cacl2
- EC (mS/cm): $d_ec
- Colour: $d_colour
- Texture: $d_texture
- Gravel (%): $d_gravel
- --- ORGANIC MATTER ---
- Organic Carbon (%): $d_ocarbon
- Organic Matter (%): $d_omatter
- --- CATION EXCHANGE ---
- CEC (meq/100g): $d_cec
- TEC (meq/100g): $d_tec
- Paramagnetic: $d_paramag
- --- NITROGEN ---
- Nitrate-N (NO3-N ppm): $d_no3 [target: 10-20 ppm] $d_no3_st
- Ammonium-N (NH3-N ppm): $d_nh3
- C:N ratio: $d_cn_ratio
- --- PHOSPHORUS ---
- P Colwell (ppm): $d_p_colwell
- P Morgan (ppm): $d_p_morgan
- P Mehlick (ppm): $d_p_mehlick
- P Bray2 (ppm): $d_p_bray2
- --- MAJOR CATIONS (ppm) ---
- Calcium Ca (ppm): $d_ca_ppm [min: $d_ca_min, max: $d_ca_max] $d_ca_st
- Magnesium Mg (ppm): $d_mg_ppm [min: $d_mg_min, max: $d_mg_max] $d_mg_st
- Potassium K (ppm): $d_k_ppm [min: $d_k_min, max: $d_k_max] $d_k_st
- Sodium Na (ppm): $d_na_ppm [min: $d_na_min, max: $d_na_max] $d_na_st
- --- BASE SATURATIONS (%) ---
- Calcium Ca (%): $d_ca_bs% [min: $d_ca_bs_min, max: $d_ca_bs_max] $d_ca_bs_st
- Magnesium Mg (%): $d_mg_bs% [min: $d_mg_bs_min, max: $d_mg_bs_max] $d_mg_bs_st
- Potassium K (%): $d_k_bs% [min: $d_k_bs_min, max: $d_k_bs_max] $d_k_bs_st
- Sodium Na (%): $d_na_bs% [min: $d_na_bs_min, max: $d_na_bs_max] $d_na_bs_st
- Other Bases (%): $d_ob% [recommended: $d_ob_rec]
- Hydrogen (%): $d_h% [recommended: $d_h_rec]
- Aluminium Al3 (%): $d_al3%
- --- MORGANS EXTRACT (ppm) ---
- Ca Morgan: $d_ca_m
- Mg Morgan: $d_mg_m
- K Morgan: $d_k_m
- Na Morgan: $d_na_m
- --- MEHLICK-3 EXTRACT (ppm) ---
- Ca Mehlick3: $d_ca_me
- Mg Mehlick3: $d_mg_me
- K Mehlick3: $d_k_me
- Na Mehlick3: $d_na_me
- Al Mehlick3: $d_al_me
- --- TRACE ELEMENTS (ppm) ---
- Sulfur S (ppm): $d_s
- Boron B (ppm): $d_b
- Manganese Mn (ppm): $d_mn
- Copper Cu (ppm): $d_cu
- Zinc Zn (ppm): $d_zn
- Iron Fe (ppm): $d_fe
- Iron Fe (total): $d_fe_tot
- Aluminium Al (ppm): $d_al
- Silicon Si (ppm): $d_si
- Cobalt Co (ppm): $d_co
- Molybdenum Mo (ppm): $d_mo
- Selenium Se (ppm): $d_se
- --- RATIOS ---
- Ca:Mg ratio: $d_ca_mg_ratio [recommended: $d_ca_mg_ratio_rec]
- C:N ratio: $d_cn_ratio
- TEXT;
- // Append quick deficiency/excess summary
- $deficiencies = [];
- $excesses = [];
- $checkElements = [
- ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
- ['Nitrate-N', $r['NO3_N'], 10, 20],
- ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
- ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
- ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
- ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
- ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
- ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
- ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
- ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
- ];
- foreach ($checkElements as [$label, $val, $lo, $hi]) {
- if (!is_numeric($val)) continue;
- $v = (float)$val;
- if (is_numeric($lo) && $v < (float)$lo) $deficiencies[] = $label;
- if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
- }
- $soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
- $soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n";
- $soilData .= "=====================================\n";
- // ── RAG: retrieve relevant passages from knowledge_chunks ─────────────────────
- $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
- $knowledgeContext = '';
- if (!empty($ragChunks)) {
- $knowledgeContext = "\n\n===================================================\n"
- . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n"
- . "(William A. Albrecht and other authorities)\n"
- . "===================================================\n";
- foreach ($ragChunks as $i => $chunk) {
- $knowledgeContext .= sprintf(
- "\n[%d] \"%s\" — %s (p.%d)\n%s\n",
- $i + 1,
- $chunk['source'],
- $chunk['author'],
- $chunk['page'],
- $chunk['chunk_text']
- );
- }
- }
- // ── Section-specific prompts ──────────────────────────────────────────────────
- $system = "You are a certified agronomist specialising in soil fertility, trained in the "
- . "Albrecht method of mineral soil balancing. You have deep knowledge of soil chemistry, "
- . "plant nutrition, and the relationship between soil mineral balance and crop and livestock health. "
- . "Always ground your recommendations in the measured data provided. "
- . "For Australian conditions, reference typical soil types and climate where relevant. "
- . "Write in a professional but accessible tone suitable for a farmer-facing report. "
- . "When the knowledge passages conflict with your training, prefer the passages — "
- . "they are from authoritative soil science texts.";
- $ctx = $soilData . $knowledgeContext;
- $prompts = [
- 'overview' =>
- "{$system}\n\n{$ctx}\n\n"
- . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
- . "Cover: (1) overall soil health and fertility level, "
- . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
- . "(3) any positive attributes. "
- . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
- 'ai_interpretation' =>
- "{$system}\n\n{$ctx}\n\n"
- . "TASK: Write a detailed technical interpretation structured with these headings:\n"
- . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
- . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
- . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
- . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n"
- . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
- . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
- . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
- . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
- . "and interactions with other elements. Reference the Albrecht literature where relevant.",
- 'foliar' =>
- "{$system}\n\n{$ctx}\n\n"
- . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
- . "Format as a numbered list or table: "
- . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
- . "Prioritise elements marked [DEFICIENT]. "
- . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
- . "Add a note on carrier water pH and adjuvant recommendations.",
- 'microbial' =>
- "{$system}\n\n{$ctx}\n\n"
- . "TASK: Design a biological/microbial soil improvement program. "
- . "Structure your response:\n"
- . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
- . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
- . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
- . "4. TIMING & INTEGRATION with the mineral balancing program\n"
- . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
- ];
- // ── Call Ollama ───────────────────────────────────────────────────────────────
- $payload = json_encode([
- 'model' => OLLAMA_MODEL,
- 'prompt' => $prompts[$section],
- 'stream' => false,
- 'options' => [
- 'temperature' => 0.3,
- 'num_predict' => 2048,
- 'num_ctx' => 6144,
- 'repeat_penalty' => 1.1,
- 'keep_alive' => -1, // keep model resident between requests
- ],
- ]);
- $ch = curl_init(OLLAMA_HOST . '/api/generate');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => $payload,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
- CURLOPT_CONNECTTIMEOUT => 5,
- ]);
- $response = curl_exec($ch);
- $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $curlErr = curl_error($ch);
- curl_close($ch);
- if ($curlErr || $response === false) {
- http_response_code(502);
- echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
- exit;
- }
- if ($httpCode !== 200) {
- http_response_code(502);
- echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
- exit;
- }
- $ollamaData = json_decode($response, true);
- $text = trim($ollamaData['response'] ?? '');
- if ($text === '') {
- http_response_code(502);
- echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
- exit;
- }
- echo json_encode([
- 'success' => true,
- 'text' => $text,
- 'rag_chunks_used' => count($ragChunks),
- ]);
- exit;
- // ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
- function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
- {
- try {
- $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
- } catch (PDOException $e) {
- return []; // Table doesn't exist yet
- }
- if ($count === 0) {
- return []; // Knowledge base not yet populated — run ingest_knowledge.php
- }
- // Try vector similarity first
- $queryEmbedding = getQueryEmbedding($queryText);
- if ($queryEmbedding !== null) {
- return vectorSearch($pdo, $queryEmbedding, $topK);
- }
- // Fallback: MySQL FULLTEXT search
- return fulltextSearch($pdo, $section, $topK);
- }
- function getQueryEmbedding(string $text): ?array
- {
- $queryText = substr($text, 0, 2000);
- // Try new /api/embed (Ollama >= 0.1.26) first
- $ch = curl_init(OLLAMA_HOST . '/api/embed');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => 15,
- CURLOPT_CONNECTTIMEOUT => 3,
- ]);
- $resp = curl_exec($ch);
- $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- curl_close($ch);
- if ($resp && $code === 200) {
- $data = json_decode($resp, true);
- $emb = $data['embeddings'][0] ?? null;
- if (is_array($emb) && count($emb) > 0) return $emb;
- }
- // Fallback: legacy /api/embeddings
- $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => 15,
- CURLOPT_CONNECTTIMEOUT => 3,
- ]);
- $resp2 = curl_exec($ch);
- $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- curl_close($ch);
- if ($resp2 && $code2 === 200) {
- $data2 = json_decode($resp2, true);
- $emb2 = $data2['embedding'] ?? null;
- if (is_array($emb2) && count($emb2) > 0) return $emb2;
- }
- return null;
- }
- function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
- {
- $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
- $scores = [];
- while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
- $chunkVec = json_decode($row['embedding'], true);
- if (!is_array($chunkVec)) continue;
- $scores[] = [
- 'score' => cosineSimilarity($queryVec, $chunkVec),
- 'source' => $row['source'],
- 'author' => $row['author'],
- 'page' => $row['page'],
- 'chunk_text' => $row['chunk_text'],
- ];
- }
- usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
- return array_slice($scores, 0, $topK);
- }
- function fulltextSearch(PDO $pdo, string $section, int $topK): array
- {
- $keywords = [
- 'overview' => 'soil fertility mineral balance calcium magnesium albrecht',
- 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
- 'foliar' => 'foliar nutrition trace elements deficiency correction spray',
- 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
- ];
- $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
- try {
- $stmt = $pdo->prepare(
- 'SELECT source, author, page, chunk_text,
- MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score
- FROM knowledge_chunks
- WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE)
- ORDER BY score DESC
- LIMIT ?'
- );
- $stmt->execute([$query, $query, $topK]);
- return $stmt->fetchAll(PDO::FETCH_ASSOC);
- } catch (PDOException $e) {
- error_log('RAG fulltext search failed: ' . $e->getMessage());
- return [];
- }
- }
- function cosineSimilarity(array $a, array $b): float
- {
- $dot = $normA = $normB = 0.0;
- $len = min(count($a), count($b));
- for ($i = 0; $i < $len; $i++) {
- $dot += $a[$i] * $b[$i];
- $normA += $a[$i] * $a[$i];
- $normB += $b[$i] * $b[$i];
- }
- $denom = sqrt($normA) * sqrt($normB);
- return $denom > 0 ? $dot / $denom : 0.0;
- }
|