ollamaGenerate.php 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. <?php
  2. error_reporting(E_ALL);
  3. ini_set('display_errors', 1);
  4. /**
  5. * controllers/ollamaGenerate.php
  6. *
  7. * AJAX POST handler: generates AI agronomic text using Ollama, grounded
  8. * with relevant passages retrieved from the soil science knowledge base
  9. * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
  10. *
  11. * Flow:
  12. * 1. Load full soil record + specification ranges from DB
  13. * 2. Build a structured data summary covering ALL measured elements
  14. * 3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
  15. * 4. Inject retrieved passages + data into a section-specific prompt
  16. * 5. Send to llama3.1 and return the generated text
  17. *
  18. * POST params:
  19. * csrf_token string
  20. * rid int soil_records.id
  21. * rand string soil_records.rand
  22. * section string overview | ai_interpretation | foliar | microbial
  23. *
  24. * Note: run ingestion from Windows where Ollama is accessible:
  25. * php tools/ingest_knowledge.php --test
  26. * php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
  27. */
  28. if (session_status() === PHP_SESSION_NONE) {
  29. session_start();
  30. }
  31. require_once __DIR__ . '/../config/database.php';
  32. require_once __DIR__ . '/../lib/auth.php';
  33. require_once __DIR__ . '/../lib/csrf.php';
  34. header('Content-Type: application/json');
  35. // ── Config ───────────────────────────────────────────────────────────────────
  36. define('OLLAMA_HOST', 'http://192.168.8.73:11434');
  37. define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
  38. define('EMBED_MODEL', 'nomic-embed-text');
  39. define('RAG_TOP_K', 6); // book passages injected per request
  40. define('OLLAMA_TIMEOUT', 180); // seconds — LLM can be slow
  41. // ── Auth + CSRF ───────────────────────────────────────────────────────────────
  42. if (!isLoggedIn()) {
  43. http_response_code(401);
  44. echo json_encode(['success' => false, 'error' => 'Not authenticated']);
  45. exit;
  46. }
  47. if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
  48. http_response_code(405);
  49. echo json_encode(['success' => false, 'error' => 'Method not allowed']);
  50. exit;
  51. }
  52. if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
  53. http_response_code(403);
  54. echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']);
  55. exit;
  56. }
  57. $recordId = (int)trim($_POST['rid'] ?? '');
  58. $randId = trim($_POST['rand'] ?? '');
  59. $section = trim($_POST['section'] ?? '');
  60. $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
  61. if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
  62. http_response_code(400);
  63. echo json_encode(['success' => false, 'error' => 'Invalid parameters']);
  64. exit;
  65. }
  66. // ── Load soil record + spec ───────────────────────────────────────────────────
  67. try {
  68. $pdo = getDBConnection();
  69. $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?');
  70. $stmt->execute([$recordId, $randId]);
  71. $row = $stmt->fetch(PDO::FETCH_ASSOC);
  72. if (!$row) {
  73. http_response_code(404);
  74. echo json_encode(['success' => false, 'error' => 'Record not found']);
  75. exit;
  76. }
  77. $spec = [];
  78. if (!empty($row['soil_type'])) {
  79. $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1');
  80. $stmtSpec->execute([$row['soil_type']]);
  81. $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: [];
  82. }
  83. } catch (PDOException $e) {
  84. error_log('DB error in ollamaGenerate.php: ' . $e->getMessage());
  85. http_response_code(500);
  86. echo json_encode(['success' => false, 'error' => 'Database error']);
  87. exit;
  88. }
  89. // ── Helpers ───────────────────────────────────────────────────────────────────
  90. function fv(mixed $v, int $dp = 2): string
  91. {
  92. if ($v === null || $v === '') return 'N/A';
  93. return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
  94. }
  95. function rangeStatus(mixed $value, mixed $min, mixed $max): string
  96. {
  97. if (!is_numeric($value)) return '';
  98. $v = (float)$value;
  99. $lo = is_numeric($min) ? (float)$min : null;
  100. $hi = is_numeric($max) ? (float)$max : null;
  101. if ($lo !== null && $v < $lo) return '[DEFICIENT]';
  102. if ($hi !== null && $v > $hi) return '[EXCESS]';
  103. if ($lo !== null || $hi !== null) return '[IDEAL]';
  104. return '';
  105. }
  106. /** Resolve a value — check spec row first, then soil record row */
  107. function sv(array $spec, array $row, string $col): mixed
  108. {
  109. if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
  110. if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col];
  111. return null;
  112. }
  113. $r = $row;
  114. $s = $spec;
  115. // ── Pre-compute all display values (heredocs don't support function calls) ────
  116. $d_ph_h2o = fv($r['ph_h2o'], 1); $d_ph_h2o_st = rangeStatus($r['ph_h2o'], 6.2, 6.8);
  117. $d_ph_cacl2 = fv($r['ph_cacl2'], 1);
  118. $d_ec = fv($r['ec'], 2);
  119. $d_colour = $r['colour'] ?? 'N/A';
  120. $d_texture = $r['texture'] ?? 'N/A';
  121. $d_gravel = fv($r['gravel'], 1);
  122. $d_ocarbon = fv($r['ocarbon'], 1);
  123. $d_omatter = fv($r['omatter'], 1);
  124. $d_cec = fv($r['cec'], 2);
  125. $d_tec = fv($r['tec'], 2);
  126. $d_paramag = fv($r['paramag'], 0);
  127. $d_no3 = fv($r['NO3_N'], 0); $d_no3_st = rangeStatus($r['NO3_N'], 10, 20);
  128. $d_nh3 = fv($r['NH3_N'], 0);
  129. $d_cn_ratio = fv($r['c_n_ratio'], 1);
  130. $d_p_colwell = fv($r['p_colwell'], 0);
  131. $d_p_morgan = fv($r['p_morgan'], 0);
  132. $d_p_mehlick = fv($r['p_mehlick'], 0);
  133. $d_p_bray2 = fv($r['p_bray2'], 0);
  134. // Major cations
  135. $d_ca_ppm = fv($r['BS_ca_ppm'], 0);
  136. $d_ca_min = fv(sv($s,$r,'ca_ppm_min'), 0); $d_ca_max = fv(sv($s,$r,'ca_ppm_max'), 0);
  137. $d_ca_st = rangeStatus($r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max'));
  138. $d_mg_ppm = fv($r['BS_mg_ppm'], 0);
  139. $d_mg_min = fv(sv($s,$r,'mg_ppm_min'), 0); $d_mg_max = fv(sv($s,$r,'mg_ppm_max'), 0);
  140. $d_mg_st = rangeStatus($r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max'));
  141. $d_k_ppm = fv($r['BS_k_ppm'], 0);
  142. $d_k_min = fv(sv($s,$r,'k_ppm_min'), 0); $d_k_max = fv(sv($s,$r,'k_ppm_max'), 0);
  143. $d_k_st = rangeStatus($r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max'));
  144. $d_na_ppm = fv($r['BS_na_ppm'], 0);
  145. $d_na_min = fv(sv($s,$r,'na_ppm_min'), 0); $d_na_max = fv(sv($s,$r,'na_ppm_max'), 0);
  146. $d_na_st = rangeStatus($r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max'));
  147. // Base saturations
  148. $d_ca_bs = fv($r['BS_ca2'], 2);
  149. $d_ca_bs_min = fv(sv($s,$r,'cabs_min'), 1); $d_ca_bs_max = fv(sv($s,$r,'cabs_max'), 1);
  150. $d_ca_bs_st = rangeStatus($r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max'));
  151. $d_mg_bs = fv($r['BS_mg2'], 2);
  152. $d_mg_bs_min = fv(sv($s,$r,'mgbs_min'), 1); $d_mg_bs_max = fv(sv($s,$r,'mgbs_max'), 1);
  153. $d_mg_bs_st = rangeStatus($r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max'));
  154. $d_k_bs = fv($r['BS_k'], 2);
  155. $d_k_bs_min = fv(sv($s,$r,'kbs_min'), 1); $d_k_bs_max = fv(sv($s,$r,'kbs_max'), 1);
  156. $d_k_bs_st = rangeStatus($r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max'));
  157. $d_na_bs = fv($r['BS_na'], 2);
  158. $d_na_bs_min = fv(sv($s,$r,'nabs_min'), 1); $d_na_bs_max = fv(sv($s,$r,'nabs_max'), 1);
  159. $d_na_bs_st = rangeStatus($r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max'));
  160. $d_ob = fv($r['BS_ob'], 2); $d_ob_rec = fv(sv($s,$r,'ob_rec'), 1);
  161. $d_h = fv($r['BS_h'], 2); $d_h_rec = fv(sv($s,$r,'h_rec'), 1);
  162. $d_al3 = fv($r['BS_al3'], 2);
  163. // Morgans
  164. $d_ca_m = fv($r['ca_morgan'], 2); $d_mg_m = fv($r['mg_morgan'], 2);
  165. $d_k_m = fv($r['k_morgan'], 2); $d_na_m = fv($r['na_morgan'], 2);
  166. // Mehlick-3
  167. $d_ca_me = fv($r['ca_mehlick3'], 2); $d_mg_me = fv($r['mg_mehlick3'], 2);
  168. $d_k_me = fv($r['k_mehlick3'], 2); $d_na_me = fv($r['na_mehlick3'], 2);
  169. $d_al_me = fv($r['al_mehlick3'], 2);
  170. // Trace elements
  171. $d_s = fv($r['s_morgan'], 2); $d_b = fv($r['b_cacl2'], 2);
  172. $d_mn = fv($r['mn_dtpa'], 2); $d_cu = fv($r['cu_dtpa'], 2);
  173. $d_zn = fv($r['zn_dtpa'], 2); $d_fe = fv($r['fe_dtpa'], 2);
  174. $d_fe_tot = fv($r['fe'], 2); $d_al = fv($r['al'], 2);
  175. $d_si = fv($r['sl_cacl2'], 2); $d_co = fv($r['co_dtpa'], 2);
  176. $d_mo = fv($r['m_dtpa'], 2); $d_se = fv($r['se'], 2);
  177. // Ratios
  178. $_ca_me_v = is_numeric($r['ca_mehlick3']) ? (float)$r['ca_mehlick3'] : 0;
  179. $_mg_me_v = is_numeric($r['mg_mehlick3']) ? (float)$r['mg_mehlick3'] : 0;
  180. $d_ca_mg_ratio = fv($_mg_me_v != 0 ? round($_ca_me_v / $_mg_me_v, 1) : null, 1);
  181. $d_ca_mg_ratio_rec = fv(sv($s,$r,'ca_mg_ratio'), 1);
  182. // Build comprehensive soil data block (ALL elements)
  183. $soilData = <<<TEXT
  184. =====================================
  185. SOIL TEST DATA — COMPLETE ANALYSIS
  186. =====================================
  187. Client: {$r['client_name']}
  188. Location: {$r['site_address']}, {$r['state_postcode']}
  189. Crop: {$r['sample_id']}
  190. Crop Type: {$r['crop_type']}
  191. Soil Type: {$r['soil_type']}
  192. Lab No: {$r['lab_no']}
  193. Date Sampled: {$r['date_sampled']}
  194. --- SOIL PHYSICAL / REACTION ---
  195. pH (H2O): $d_ph_h2o [target: 6.2-6.8] $d_ph_h2o_st
  196. pH (CaCl2): $d_ph_cacl2
  197. EC (mS/cm): $d_ec
  198. Colour: $d_colour
  199. Texture: $d_texture
  200. Gravel (%): $d_gravel
  201. --- ORGANIC MATTER ---
  202. Organic Carbon (%): $d_ocarbon
  203. Organic Matter (%): $d_omatter
  204. --- CATION EXCHANGE ---
  205. CEC (meq/100g): $d_cec
  206. TEC (meq/100g): $d_tec
  207. Paramagnetic: $d_paramag
  208. --- NITROGEN ---
  209. Nitrate-N (NO3-N ppm): $d_no3 [target: 10-20 ppm] $d_no3_st
  210. Ammonium-N (NH3-N ppm): $d_nh3
  211. C:N ratio: $d_cn_ratio
  212. --- PHOSPHORUS ---
  213. P Colwell (ppm): $d_p_colwell
  214. P Morgan (ppm): $d_p_morgan
  215. P Mehlick (ppm): $d_p_mehlick
  216. P Bray2 (ppm): $d_p_bray2
  217. --- MAJOR CATIONS (ppm) ---
  218. Calcium Ca (ppm): $d_ca_ppm [min: $d_ca_min, max: $d_ca_max] $d_ca_st
  219. Magnesium Mg (ppm): $d_mg_ppm [min: $d_mg_min, max: $d_mg_max] $d_mg_st
  220. Potassium K (ppm): $d_k_ppm [min: $d_k_min, max: $d_k_max] $d_k_st
  221. Sodium Na (ppm): $d_na_ppm [min: $d_na_min, max: $d_na_max] $d_na_st
  222. --- BASE SATURATIONS (%) ---
  223. Calcium Ca (%): $d_ca_bs% [min: $d_ca_bs_min, max: $d_ca_bs_max] $d_ca_bs_st
  224. Magnesium Mg (%): $d_mg_bs% [min: $d_mg_bs_min, max: $d_mg_bs_max] $d_mg_bs_st
  225. Potassium K (%): $d_k_bs% [min: $d_k_bs_min, max: $d_k_bs_max] $d_k_bs_st
  226. Sodium Na (%): $d_na_bs% [min: $d_na_bs_min, max: $d_na_bs_max] $d_na_bs_st
  227. Other Bases (%): $d_ob% [recommended: $d_ob_rec]
  228. Hydrogen (%): $d_h% [recommended: $d_h_rec]
  229. Aluminium Al3 (%): $d_al3%
  230. --- MORGANS EXTRACT (ppm) ---
  231. Ca Morgan: $d_ca_m
  232. Mg Morgan: $d_mg_m
  233. K Morgan: $d_k_m
  234. Na Morgan: $d_na_m
  235. --- MEHLICK-3 EXTRACT (ppm) ---
  236. Ca Mehlick3: $d_ca_me
  237. Mg Mehlick3: $d_mg_me
  238. K Mehlick3: $d_k_me
  239. Na Mehlick3: $d_na_me
  240. Al Mehlick3: $d_al_me
  241. --- TRACE ELEMENTS (ppm) ---
  242. Sulfur S (ppm): $d_s
  243. Boron B (ppm): $d_b
  244. Manganese Mn (ppm): $d_mn
  245. Copper Cu (ppm): $d_cu
  246. Zinc Zn (ppm): $d_zn
  247. Iron Fe (ppm): $d_fe
  248. Iron Fe (total): $d_fe_tot
  249. Aluminium Al (ppm): $d_al
  250. Silicon Si (ppm): $d_si
  251. Cobalt Co (ppm): $d_co
  252. Molybdenum Mo (ppm): $d_mo
  253. Selenium Se (ppm): $d_se
  254. --- RATIOS ---
  255. Ca:Mg ratio: $d_ca_mg_ratio [recommended: $d_ca_mg_ratio_rec]
  256. C:N ratio: $d_cn_ratio
  257. TEXT;
  258. // Append quick deficiency/excess summary
  259. $deficiencies = [];
  260. $excesses = [];
  261. $checkElements = [
  262. ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
  263. ['Nitrate-N', $r['NO3_N'], 10, 20],
  264. ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
  265. ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
  266. ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
  267. ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
  268. ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
  269. ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
  270. ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
  271. ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
  272. ];
  273. foreach ($checkElements as [$label, $val, $lo, $hi]) {
  274. if (!is_numeric($val)) continue;
  275. $v = (float)$val;
  276. if (is_numeric($lo) && $v < (float)$lo) $deficiencies[] = $label;
  277. if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
  278. }
  279. $soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
  280. $soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n";
  281. $soilData .= "=====================================\n";
  282. // ── RAG: retrieve relevant passages from knowledge_chunks ─────────────────────
  283. $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
  284. $knowledgeContext = '';
  285. if (!empty($ragChunks)) {
  286. $knowledgeContext = "\n\n===================================================\n"
  287. . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n"
  288. . "(William A. Albrecht and other authorities)\n"
  289. . "===================================================\n";
  290. foreach ($ragChunks as $i => $chunk) {
  291. $knowledgeContext .= sprintf(
  292. "\n[%d] \"%s\" — %s (p.%d)\n%s\n",
  293. $i + 1,
  294. $chunk['source'],
  295. $chunk['author'],
  296. $chunk['page'],
  297. $chunk['chunk_text']
  298. );
  299. }
  300. }
  301. // ── Section-specific prompts ──────────────────────────────────────────────────
  302. $system = "You are a certified agronomist specialising in soil fertility, trained in the "
  303. . "Albrecht method of mineral soil balancing. You have deep knowledge of soil chemistry, "
  304. . "plant nutrition, and the relationship between soil mineral balance and crop and livestock health. "
  305. . "Always ground your recommendations in the measured data provided. "
  306. . "For Australian conditions, reference typical soil types and climate where relevant. "
  307. . "Write in a professional but accessible tone suitable for a farmer-facing report. "
  308. . "When the knowledge passages conflict with your training, prefer the passages — "
  309. . "they are from authoritative soil science texts.";
  310. $ctx = $soilData . $knowledgeContext;
  311. $prompts = [
  312. 'overview' =>
  313. "{$system}\n\n{$ctx}\n\n"
  314. . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
  315. . "Cover: (1) overall soil health and fertility level, "
  316. . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
  317. . "(3) any positive attributes. "
  318. . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
  319. 'ai_interpretation' =>
  320. "{$system}\n\n{$ctx}\n\n"
  321. . "TASK: Write a detailed technical interpretation structured with these headings:\n"
  322. . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
  323. . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
  324. . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
  325. . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n"
  326. . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
  327. . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
  328. . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
  329. . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
  330. . "and interactions with other elements. Reference the Albrecht literature where relevant.",
  331. 'foliar' =>
  332. "{$system}\n\n{$ctx}\n\n"
  333. . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
  334. . "Format as a numbered list or table: "
  335. . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
  336. . "Prioritise elements marked [DEFICIENT]. "
  337. . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
  338. . "Add a note on carrier water pH and adjuvant recommendations.",
  339. 'microbial' =>
  340. "{$system}\n\n{$ctx}\n\n"
  341. . "TASK: Design a biological/microbial soil improvement program. "
  342. . "Structure your response:\n"
  343. . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
  344. . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
  345. . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
  346. . "4. TIMING & INTEGRATION with the mineral balancing program\n"
  347. . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
  348. ];
  349. // ── Call Ollama ───────────────────────────────────────────────────────────────
  350. $payload = json_encode([
  351. 'model' => OLLAMA_MODEL,
  352. 'prompt' => $prompts[$section],
  353. 'stream' => false,
  354. 'options' => [
  355. 'temperature' => 0.3,
  356. 'num_predict' => 2048,
  357. 'num_ctx' => 6144,
  358. 'repeat_penalty' => 1.1,
  359. 'keep_alive' => -1, // keep model resident between requests
  360. ],
  361. ]);
  362. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  363. curl_setopt_array($ch, [
  364. CURLOPT_POST => true,
  365. CURLOPT_POSTFIELDS => $payload,
  366. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  367. CURLOPT_RETURNTRANSFER => true,
  368. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  369. CURLOPT_CONNECTTIMEOUT => 5,
  370. ]);
  371. $response = curl_exec($ch);
  372. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  373. $curlErr = curl_error($ch);
  374. curl_close($ch);
  375. if ($curlErr || $response === false) {
  376. http_response_code(502);
  377. echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
  378. exit;
  379. }
  380. if ($httpCode !== 200) {
  381. http_response_code(502);
  382. echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
  383. exit;
  384. }
  385. $ollamaData = json_decode($response, true);
  386. $text = trim($ollamaData['response'] ?? '');
  387. if ($text === '') {
  388. http_response_code(502);
  389. echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
  390. exit;
  391. }
  392. echo json_encode([
  393. 'success' => true,
  394. 'text' => $text,
  395. 'rag_chunks_used' => count($ragChunks),
  396. ]);
  397. exit;
  398. // ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
  399. function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
  400. {
  401. try {
  402. $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
  403. } catch (PDOException $e) {
  404. return []; // Table doesn't exist yet
  405. }
  406. if ($count === 0) {
  407. return []; // Knowledge base not yet populated — run ingest_knowledge.php
  408. }
  409. // Try vector similarity first
  410. $queryEmbedding = getQueryEmbedding($queryText);
  411. if ($queryEmbedding !== null) {
  412. return vectorSearch($pdo, $queryEmbedding, $topK);
  413. }
  414. // Fallback: MySQL FULLTEXT search
  415. return fulltextSearch($pdo, $section, $topK);
  416. }
  417. function getQueryEmbedding(string $text): ?array
  418. {
  419. $queryText = substr($text, 0, 2000);
  420. // Try new /api/embed (Ollama >= 0.1.26) first
  421. $ch = curl_init(OLLAMA_HOST . '/api/embed');
  422. curl_setopt_array($ch, [
  423. CURLOPT_POST => true,
  424. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
  425. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  426. CURLOPT_RETURNTRANSFER => true,
  427. CURLOPT_TIMEOUT => 15,
  428. CURLOPT_CONNECTTIMEOUT => 3,
  429. ]);
  430. $resp = curl_exec($ch);
  431. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  432. curl_close($ch);
  433. if ($resp && $code === 200) {
  434. $data = json_decode($resp, true);
  435. $emb = $data['embeddings'][0] ?? null;
  436. if (is_array($emb) && count($emb) > 0) return $emb;
  437. }
  438. // Fallback: legacy /api/embeddings
  439. $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
  440. curl_setopt_array($ch, [
  441. CURLOPT_POST => true,
  442. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
  443. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  444. CURLOPT_RETURNTRANSFER => true,
  445. CURLOPT_TIMEOUT => 15,
  446. CURLOPT_CONNECTTIMEOUT => 3,
  447. ]);
  448. $resp2 = curl_exec($ch);
  449. $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  450. curl_close($ch);
  451. if ($resp2 && $code2 === 200) {
  452. $data2 = json_decode($resp2, true);
  453. $emb2 = $data2['embedding'] ?? null;
  454. if (is_array($emb2) && count($emb2) > 0) return $emb2;
  455. }
  456. return null;
  457. }
  458. function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
  459. {
  460. $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
  461. $scores = [];
  462. while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
  463. $chunkVec = json_decode($row['embedding'], true);
  464. if (!is_array($chunkVec)) continue;
  465. $scores[] = [
  466. 'score' => cosineSimilarity($queryVec, $chunkVec),
  467. 'source' => $row['source'],
  468. 'author' => $row['author'],
  469. 'page' => $row['page'],
  470. 'chunk_text' => $row['chunk_text'],
  471. ];
  472. }
  473. usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
  474. return array_slice($scores, 0, $topK);
  475. }
  476. function fulltextSearch(PDO $pdo, string $section, int $topK): array
  477. {
  478. $keywords = [
  479. 'overview' => 'soil fertility mineral balance calcium magnesium albrecht',
  480. 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
  481. 'foliar' => 'foliar nutrition trace elements deficiency correction spray',
  482. 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
  483. ];
  484. $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
  485. try {
  486. $stmt = $pdo->prepare(
  487. 'SELECT source, author, page, chunk_text,
  488. MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score
  489. FROM knowledge_chunks
  490. WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE)
  491. ORDER BY score DESC
  492. LIMIT ?'
  493. );
  494. $stmt->execute([$query, $query, $topK]);
  495. return $stmt->fetchAll(PDO::FETCH_ASSOC);
  496. } catch (PDOException $e) {
  497. error_log('RAG fulltext search failed: ' . $e->getMessage());
  498. return [];
  499. }
  500. }
  501. function cosineSimilarity(array $a, array $b): float
  502. {
  503. $dot = $normA = $normB = 0.0;
  504. $len = min(count($a), count($b));
  505. for ($i = 0; $i < $len; $i++) {
  506. $dot += $a[$i] * $b[$i];
  507. $normA += $a[$i] * $a[$i];
  508. $normB += $b[$i] * $b[$i];
  509. }
  510. $denom = sqrt($normA) * sqrt($normB);
  511. return $denom > 0 ? $dot / $denom : 0.0;
  512. }