ollamaGenerate.php 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. <?php
  2. /**
  3. * controllers/ollamaGenerate.php
  4. *
  5. * AJAX POST handler: generates AI agronomic text using Ollama, grounded
  6. * with relevant passages retrieved from the soil science knowledge base
  7. * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
  8. *
  9. * Flow:
  10. * 1. Load full soil record + specification ranges from DB
  11. * 2. Build a structured data summary covering ALL measured elements
  12. * 3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
  13. * 4. Inject retrieved passages + data into a section-specific prompt
  14. * 5. Send to llama3.1 and return the generated text
  15. *
  16. * POST params:
  17. * csrf_token string
  18. * rid int soil_records.id
  19. * rand string soil_records.rand
  20. * section string overview | ai_interpretation | foliar | microbial
  21. *
  22. * Note: run ingestion from Windows where Ollama is accessible:
  23. * php tools/ingest_knowledge.php --test
  24. * php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
  25. */
  26. if (session_status() === PHP_SESSION_NONE) {
  27. session_start();
  28. }
  29. require_once __DIR__ . '/../config/database.php';
  30. require_once __DIR__ . '/../lib/auth.php';
  31. require_once __DIR__ . '/../lib/csrf.php';
  32. header('Content-Type: application/json');
  33. // ── Config ───────────────────────────────────────────────────────────────────
  34. define('OLLAMA_HOST', 'http://192.168.8.73:11434');
  35. define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
  36. define('EMBED_MODEL', 'nomic-embed-text');
  37. define('RAG_TOP_K', 6); // book passages injected per request
  38. define('OLLAMA_TIMEOUT', 180); // seconds — LLM can be slow
  39. // ── Auth + CSRF ───────────────────────────────────────────────────────────────
  40. if (!isLoggedIn()) {
  41. http_response_code(401);
  42. echo json_encode(['success' => false, 'error' => 'Not authenticated']);
  43. exit;
  44. }
  45. if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
  46. http_response_code(405);
  47. echo json_encode(['success' => false, 'error' => 'Method not allowed']);
  48. exit;
  49. }
  50. if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
  51. http_response_code(403);
  52. echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']);
  53. exit;
  54. }
  55. $recordId = (int)trim($_POST['rid'] ?? '');
  56. $randId = trim($_POST['rand'] ?? '');
  57. $section = trim($_POST['section'] ?? '');
  58. $recordType = trim($_POST['record_type'] ?? 'soil'); // soil | plant
  59. $validSoilSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
  60. $validPlantSections = ['general', 'ai_interpretation', 'recommended', 'foliar'];
  61. $validSections = $recordType === 'plant' ? $validPlantSections : $validSoilSections;
  62. if (!$recordId || $randId === '' || !in_array($section, $validSections, true)
  63. || !in_array($recordType, ['soil', 'plant'], true)) {
  64. http_response_code(400);
  65. echo json_encode(['success' => false, 'error' => 'Invalid parameters']);
  66. exit;
  67. }
  68. // ── Load record + spec ────────────────────────────────────────────────────────
  69. try {
  70. $pdo = getDBConnection();
  71. if ($recordType === 'plant') {
  72. $stmt = $pdo->prepare('SELECT * FROM plant_records WHERE id = ? AND rand = ?');
  73. $stmt->execute([$recordId, $randId]);
  74. $row = $stmt->fetch(PDO::FETCH_ASSOC);
  75. if (!$row) {
  76. http_response_code(404);
  77. echo json_encode(['success' => false, 'error' => 'Record not found']);
  78. exit;
  79. }
  80. $spec = [];
  81. if (!empty($row['crop_type'])) {
  82. $stmtSpec = $pdo->prepare('SELECT * FROM plant_specifications WHERE plant_type = ? LIMIT 1');
  83. $stmtSpec->execute([$row['crop_type']]);
  84. $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: [];
  85. }
  86. // Load up to 3 prior records for the same crop_type + sample_id
  87. $historicalRows = [];
  88. if (!empty($row['crop_type']) && !empty($row['sample_id'])) {
  89. $stmtHist = $pdo->prepare(
  90. 'SELECT id, date_sampled, n, p, k, s, mg, ca, na, fe, mn, zn, cu, b, m, co, se, cl, c
  91. FROM plant_records
  92. WHERE client_records_id = ?
  93. AND crop_type = ?
  94. AND sample_id = ?
  95. AND id != ?
  96. ORDER BY date_sampled DESC
  97. LIMIT 3'
  98. );
  99. $stmtHist->execute([
  100. $row['client_records_id'],
  101. $row['crop_type'],
  102. $row['sample_id'],
  103. $recordId,
  104. ]);
  105. $historicalRows = $stmtHist->fetchAll(PDO::FETCH_ASSOC);
  106. }
  107. } else {
  108. $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?');
  109. $stmt->execute([$recordId, $randId]);
  110. $row = $stmt->fetch(PDO::FETCH_ASSOC);
  111. if (!$row) {
  112. http_response_code(404);
  113. echo json_encode(['success' => false, 'error' => 'Record not found']);
  114. exit;
  115. }
  116. $spec = [];
  117. if (!empty($row['soil_type'])) {
  118. $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1');
  119. $stmtSpec->execute([$row['soil_type']]);
  120. $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: [];
  121. }
  122. }
  123. } catch (PDOException $e) {
  124. error_log('DB error in ollamaGenerate.php: ' . $e->getMessage());
  125. http_response_code(500);
  126. echo json_encode(['success' => false, 'error' => 'Database error']);
  127. exit;
  128. }
  129. // ── Helpers ───────────────────────────────────────────────────────────────────
  130. function fv(mixed $v, int $dp = 2): string
  131. {
  132. if ($v === null || $v === '') return 'N/A';
  133. return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
  134. }
  135. function rangeStatus(mixed $value, mixed $min, mixed $max): string
  136. {
  137. if (!is_numeric($value)) return '';
  138. $v = (float)$value;
  139. $lo = is_numeric($min) ? (float)$min : null;
  140. $hi = is_numeric($max) ? (float)$max : null;
  141. if ($lo !== null && $v < $lo) return '[DEFICIENT]';
  142. if ($hi !== null && $v > $hi) return '[EXCESS]';
  143. if ($lo !== null || $hi !== null) return '[IDEAL]';
  144. return '';
  145. }
  146. /** Resolve a value — check spec row first, then soil record row */
  147. function sv(array $spec, array $row, string $col): mixed
  148. {
  149. if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
  150. if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col];
  151. return null;
  152. }
  153. $r = $row;
  154. $s = $spec;
  155. // ── Pre-compute all display values (heredocs don't support function calls) ────
  156. $d_ph_h2o = fv($r['ph_h2o'], 1); $d_ph_h2o_st = rangeStatus($r['ph_h2o'], 6.2, 6.8);
  157. $d_ph_cacl2 = fv($r['ph_cacl2'], 1);
  158. $d_ec = fv($r['ec'], 2);
  159. $d_colour = $r['colour'] ?? 'N/A';
  160. $d_texture = $r['texture'] ?? 'N/A';
  161. $d_gravel = fv($r['gravel'], 1);
  162. $d_ocarbon = fv($r['ocarbon'], 1);
  163. $d_omatter = fv($r['omatter'], 1);
  164. $d_cec = fv($r['cec'], 2);
  165. $d_tec = fv($r['tec'], 2);
  166. $d_paramag = fv($r['paramag'], 0);
  167. $d_no3 = fv($r['NO3_N'], 0); $d_no3_st = rangeStatus($r['NO3_N'], 10, 20);
  168. $d_nh3 = fv($r['NH3_N'], 0);
  169. $d_cn_ratio = fv($r['c_n_ratio'], 1);
  170. $d_p_colwell = fv($r['p_colwell'], 0);
  171. $d_p_morgan = fv($r['p_morgan'], 0);
  172. $d_p_mehlick = fv($r['p_mehlick'], 0);
  173. $d_p_bray2 = fv($r['p_bray2'], 0);
  174. // Major cations
  175. $d_ca_ppm = fv($r['BS_ca_ppm'], 0);
  176. $d_ca_min = fv(sv($s,$r,'ca_ppm_min'), 0); $d_ca_max = fv(sv($s,$r,'ca_ppm_max'), 0);
  177. $d_ca_st = rangeStatus($r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max'));
  178. $d_mg_ppm = fv($r['BS_mg_ppm'], 0);
  179. $d_mg_min = fv(sv($s,$r,'mg_ppm_min'), 0); $d_mg_max = fv(sv($s,$r,'mg_ppm_max'), 0);
  180. $d_mg_st = rangeStatus($r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max'));
  181. $d_k_ppm = fv($r['BS_k_ppm'], 0);
  182. $d_k_min = fv(sv($s,$r,'k_ppm_min'), 0); $d_k_max = fv(sv($s,$r,'k_ppm_max'), 0);
  183. $d_k_st = rangeStatus($r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max'));
  184. $d_na_ppm = fv($r['BS_na_ppm'], 0);
  185. $d_na_min = fv(sv($s,$r,'na_ppm_min'), 0); $d_na_max = fv(sv($s,$r,'na_ppm_max'), 0);
  186. $d_na_st = rangeStatus($r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max'));
  187. // Base saturations
  188. $d_ca_bs = fv($r['BS_ca2'], 2);
  189. $d_ca_bs_min = fv(sv($s,$r,'cabs_min'), 1); $d_ca_bs_max = fv(sv($s,$r,'cabs_max'), 1);
  190. $d_ca_bs_st = rangeStatus($r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max'));
  191. $d_mg_bs = fv($r['BS_mg2'], 2);
  192. $d_mg_bs_min = fv(sv($s,$r,'mgbs_min'), 1); $d_mg_bs_max = fv(sv($s,$r,'mgbs_max'), 1);
  193. $d_mg_bs_st = rangeStatus($r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max'));
  194. $d_k_bs = fv($r['BS_k'], 2);
  195. $d_k_bs_min = fv(sv($s,$r,'kbs_min'), 1); $d_k_bs_max = fv(sv($s,$r,'kbs_max'), 1);
  196. $d_k_bs_st = rangeStatus($r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max'));
  197. $d_na_bs = fv($r['BS_na'], 2);
  198. $d_na_bs_min = fv(sv($s,$r,'nabs_min'), 1); $d_na_bs_max = fv(sv($s,$r,'nabs_max'), 1);
  199. $d_na_bs_st = rangeStatus($r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max'));
  200. $d_ob = fv($r['BS_ob'], 2); $d_ob_rec = fv(sv($s,$r,'ob_rec'), 1);
  201. $d_h = fv($r['BS_h'], 2); $d_h_rec = fv(sv($s,$r,'h_rec'), 1);
  202. $d_al3 = fv($r['BS_al3'], 2);
  203. // Morgans
  204. $d_ca_m = fv($r['ca_morgan'], 2); $d_mg_m = fv($r['mg_morgan'], 2);
  205. $d_k_m = fv($r['k_morgan'], 2); $d_na_m = fv($r['na_morgan'], 2);
  206. // Mehlick-3
  207. $d_ca_me = fv($r['ca_mehlick3'], 2); $d_mg_me = fv($r['mg_mehlick3'], 2);
  208. $d_k_me = fv($r['k_mehlick3'], 2); $d_na_me = fv($r['na_mehlick3'], 2);
  209. $d_al_me = fv($r['al_mehlick3'], 2);
  210. // Trace elements
  211. $d_s = fv($r['s_morgan'], 2); $d_b = fv($r['b_cacl2'], 2);
  212. $d_mn = fv($r['mn_dtpa'], 2); $d_cu = fv($r['cu_dtpa'], 2);
  213. $d_zn = fv($r['zn_dtpa'], 2); $d_fe = fv($r['fe_dtpa'], 2);
  214. $d_fe_tot = fv($r['fe'], 2); $d_al = fv($r['al'], 2);
  215. $d_si = fv($r['sl_cacl2'], 2); $d_co = fv($r['co_dtpa'], 2);
  216. $d_mo = fv($r['m_dtpa'], 2); $d_se = fv($r['se'], 2);
  217. // Ratios
  218. $_ca_me_v = is_numeric($r['ca_mehlick3']) ? (float)$r['ca_mehlick3'] : 0;
  219. $_mg_me_v = is_numeric($r['mg_mehlick3']) ? (float)$r['mg_mehlick3'] : 0;
  220. $d_ca_mg_ratio = fv($_mg_me_v != 0 ? round($_ca_me_v / $_mg_me_v, 1) : null, 1);
  221. $d_ca_mg_ratio_rec = fv(sv($s,$r,'ca_mg_ratio'), 1);
  222. // Build comprehensive soil data block (ALL elements)
  223. $soilData = <<<TEXT
  224. =====================================
  225. SOIL TEST DATA — COMPLETE ANALYSIS
  226. =====================================
  227. Client: {$r['client_name']}
  228. Location: {$r['site_address']}, {$r['state_postcode']}
  229. Crop: {$r['sample_id']}
  230. Crop Type: {$r['crop_type']}
  231. Soil Type: {$r['soil_type']}
  232. Lab No: {$r['lab_no']}
  233. Date Sampled: {$r['date_sampled']}
  234. --- SOIL PHYSICAL / REACTION ---
  235. pH (H2O): $d_ph_h2o [target: 6.2-6.8] $d_ph_h2o_st
  236. pH (CaCl2): $d_ph_cacl2
  237. EC (mS/cm): $d_ec
  238. Colour: $d_colour
  239. Texture: $d_texture
  240. Gravel (%): $d_gravel
  241. --- ORGANIC MATTER ---
  242. Organic Carbon (%): $d_ocarbon
  243. Organic Matter (%): $d_omatter
  244. --- CATION EXCHANGE ---
  245. CEC (meq/100g): $d_cec
  246. TEC (meq/100g): $d_tec
  247. Paramagnetic: $d_paramag
  248. --- NITROGEN ---
  249. Nitrate-N (NO3-N ppm): $d_no3 [target: 10-20 ppm] $d_no3_st
  250. Ammonium-N (NH3-N ppm): $d_nh3
  251. C:N ratio: $d_cn_ratio
  252. --- PHOSPHORUS ---
  253. P Colwell (ppm): $d_p_colwell
  254. P Morgan (ppm): $d_p_morgan
  255. P Mehlick (ppm): $d_p_mehlick
  256. P Bray2 (ppm): $d_p_bray2
  257. --- MAJOR CATIONS (ppm) ---
  258. Calcium Ca (ppm): $d_ca_ppm [min: $d_ca_min, max: $d_ca_max] $d_ca_st
  259. Magnesium Mg (ppm): $d_mg_ppm [min: $d_mg_min, max: $d_mg_max] $d_mg_st
  260. Potassium K (ppm): $d_k_ppm [min: $d_k_min, max: $d_k_max] $d_k_st
  261. Sodium Na (ppm): $d_na_ppm [min: $d_na_min, max: $d_na_max] $d_na_st
  262. --- BASE SATURATIONS (%) ---
  263. Calcium Ca (%): $d_ca_bs% [min: $d_ca_bs_min, max: $d_ca_bs_max] $d_ca_bs_st
  264. Magnesium Mg (%): $d_mg_bs% [min: $d_mg_bs_min, max: $d_mg_bs_max] $d_mg_bs_st
  265. Potassium K (%): $d_k_bs% [min: $d_k_bs_min, max: $d_k_bs_max] $d_k_bs_st
  266. Sodium Na (%): $d_na_bs% [min: $d_na_bs_min, max: $d_na_bs_max] $d_na_bs_st
  267. Other Bases (%): $d_ob% [recommended: $d_ob_rec]
  268. Hydrogen (%): $d_h% [recommended: $d_h_rec]
  269. Aluminium Al3 (%): $d_al3%
  270. --- MORGANS EXTRACT (ppm) ---
  271. Ca Morgan: $d_ca_m
  272. Mg Morgan: $d_mg_m
  273. K Morgan: $d_k_m
  274. Na Morgan: $d_na_m
  275. --- MEHLICK-3 EXTRACT (ppm) ---
  276. Ca Mehlick3: $d_ca_me
  277. Mg Mehlick3: $d_mg_me
  278. K Mehlick3: $d_k_me
  279. Na Mehlick3: $d_na_me
  280. Al Mehlick3: $d_al_me
  281. --- TRACE ELEMENTS (ppm) ---
  282. Sulfur S (ppm): $d_s
  283. Boron B (ppm): $d_b
  284. Manganese Mn (ppm): $d_mn
  285. Copper Cu (ppm): $d_cu
  286. Zinc Zn (ppm): $d_zn
  287. Iron Fe (ppm): $d_fe
  288. Iron Fe (total): $d_fe_tot
  289. Aluminium Al (ppm): $d_al
  290. Silicon Si (ppm): $d_si
  291. Cobalt Co (ppm): $d_co
  292. Molybdenum Mo (ppm): $d_mo
  293. Selenium Se (ppm): $d_se
  294. --- RATIOS ---
  295. Ca:Mg ratio: $d_ca_mg_ratio [recommended: $d_ca_mg_ratio_rec]
  296. C:N ratio: $d_cn_ratio
  297. TEXT;
  298. // Append quick deficiency/excess summary
  299. $deficiencies = [];
  300. $excesses = [];
  301. $checkElements = [
  302. ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
  303. ['Nitrate-N', $r['NO3_N'], 10, 20],
  304. ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
  305. ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
  306. ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
  307. ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
  308. ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
  309. ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
  310. ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
  311. ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
  312. ];
  313. foreach ($checkElements as [$label, $val, $lo, $hi]) {
  314. if (!is_numeric($val)) continue;
  315. $v = (float)$val;
  316. if (is_numeric($lo) && $v < (float)$lo) $deficiencies[] = $label;
  317. if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
  318. }
  319. $soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
  320. $soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n";
  321. $soilData .= "=====================================\n";
  322. // ── System prompt per record type ────────────────────────────────────────────
  323. $systemPrompts = [
  324. 'soil' =>
  325. "You are a certified agronomist and soil scientist specialising in mineral soil balancing, "
  326. . "trained in the Albrecht method of base saturation and cation exchange chemistry. "
  327. . "You have deep knowledge of soil chemistry, soil biology, and the relationship between "
  328. . "soil mineral balance and crop productivity, livestock health, and human nutrition. "
  329. . "You understand Australian soil types — Vertosols, Chromosols, Sodosols, Tenosols, "
  330. . "Dermosols and others — and how climate and rainfall influence nutrient behaviour. "
  331. . "Always ground your recommendations in the measured data. "
  332. . "Write in a professional but accessible tone suitable for a farmer-facing report. "
  333. . "When knowledge passages from the Albrecht literature are provided, "
  334. . "prefer them over your general training — they are the authoritative reference.",
  335. 'plant' =>
  336. "You are a certified agronomist specialising in plant tissue analysis and crop nutrition. "
  337. . "You have deep knowledge of plant physiology, nutrient uptake mechanisms, and the "
  338. . "relationship between tissue nutrient levels and crop yield, quality, and disease resistance. "
  339. . "You are familiar with the CSIRO Plant Analysis Handbook, Hill Laboratories guidelines, "
  340. . "and PIRSA soil and plant analysis standards used in Australia. "
  341. . "You understand how plant mineral imbalances, pH, and antagonisms (e.g. Ca/Mg, Zn/P, "
  342. . "K/Mg) translate into plant deficiency or excess symptoms. "
  343. . "Always base your interpretation on the measured tissue values and specified ranges. "
  344. . "Write in a professional but accessible tone suitable for a farmer-facing report.",
  345. 'water' =>
  346. "You are a certified irrigation agronomist and water quality specialist. "
  347. . "You have deep knowledge of water chemistry, salinity, sodicity, and the effects of "
  348. . "irrigation water quality on soil structure, nutrient availability, and crop health. "
  349. . "You are familiar with Australian irrigation guidelines (ANZECC/ARMCANZ), SAR and "
  350. . "EC thresholds for different soil types and crops, and bicarbonate/carbonate effects "
  351. . "on calcium and magnesium availability. "
  352. . "Always base your interpretation on the measured water analysis values. "
  353. . "Write in a professional but accessible tone suitable for a farmer-facing report.",
  354. 'animal' =>
  355. "You are a certified animal nutritionist and ruminant dietitian with expertise in "
  356. . "livestock dietary mineral balance for Australian conditions. "
  357. . "You have deep knowledge of macro and trace mineral requirements for cattle, sheep, "
  358. . "and other livestock — including the relationship between pasture/feed mineral levels "
  359. . "and animal health, reproduction, and production outcomes. "
  360. . "You understand antagonisms such as Cu/Mo/S, Se/S, Zn/Fe, and the role of "
  361. . "Albrecht-balanced soils in producing nutritionally complete feed. "
  362. . "Always base your interpretation on the measured dietary analysis values. "
  363. . "Write in a professional but accessible tone suitable for a farmer or veterinarian.",
  364. 'compost' =>
  365. "You are a certified agronomist and composting specialist with expertise in organic "
  366. . "matter management, compost maturity assessment, and the use of compost to restore "
  367. . "soil biology and mineral balance. "
  368. . "You have deep knowledge of C:N ratios, microbial succession, humus formation, "
  369. . "and how compost inputs interact with existing soil chemistry. "
  370. . "You are familiar with the Elaine Ingham and Arden Anderson's compost guidelines, "
  371. . "You understand Australian composting standards and the role of biologically active "
  372. . "compost in supporting the Albrecht soil balancing philosophy. "
  373. . "Always base your interpretation on the measured compost analysis values. "
  374. . "Write in a professional but accessible tone suitable for a farmer-facing report.",
  375. ];
  376. $system = $systemPrompts[$recordType] ?? $systemPrompts['soil'];
  377. // ── Build analysis data block + prompts depending on record type ──────────────
  378. $ragChunks = []; // initialised here; populated only for soil path
  379. if ($recordType === 'plant') {
  380. // ── Plant data summary ────────────────────────────────────────────────────
  381. $p = $row;
  382. $ps = $spec;
  383. // Spec column map (matches plant_specifications DB schema)
  384. $pSpecCols = [
  385. 'n' => ['n_min', 'n_max'],
  386. 'p' => ['P_Min', 'P_Max'],
  387. 'k' => ['K_Min', 'K_Max'],
  388. 's' => ['S_Min', 'S_Max'],
  389. 'mg' => ['Mg_Min', 'Mg_Max'],
  390. 'ca' => ['Ca_Min', 'Ca_Max'],
  391. 'na' => ['Na_Min', 'Na_Max'],
  392. 'fe' => ['Fe_Min', 'Fe_Max'],
  393. 'mn' => ['Mn_Min', 'Mn_Max'],
  394. 'zn' => ['Zn_Min', 'Zn_Max'],
  395. 'cu' => ['Cu_Min', 'Cu_Max'],
  396. 'b' => ['B_Min', 'B_Max'],
  397. 'm' => ['M_Min', 'M_Max'],
  398. 'co' => ['Co_min', 'Co_max'],
  399. 'se' => ['se_min', 'se_max'],
  400. 'cl' => ['cl_min', 'cl_max'],
  401. 'c' => ['c_min', 'c_max'],
  402. ];
  403. $plantElements = [
  404. ['n', 'Nitrogen', '%'],
  405. ['p', 'Phosphorus', '%'],
  406. ['k', 'Potassium', '%'],
  407. ['s', 'Sulfur', '%'],
  408. ['mg', 'Magnesium', '%'],
  409. ['ca', 'Calcium', '%'],
  410. ['na', 'Sodium', '%'],
  411. ['fe', 'Iron', 'ppm'],
  412. ['mn', 'Manganese', 'ppm'],
  413. ['zn', 'Zinc', 'ppm'],
  414. ['cu', 'Copper', 'ppm'],
  415. ['b', 'Boron', 'ppm'],
  416. ['m', 'Molybdenum','ppm'],
  417. ['co', 'Cobalt', 'ppm'],
  418. ['se', 'Selenium', 'ppm'],
  419. ['cl', 'Chloride', 'ppm'],
  420. ['c', 'Carbon', '%'],
  421. ];
  422. $plantDeficiencies = [];
  423. $plantExcesses = [];
  424. $elementLines = '';
  425. foreach ($plantElements as [$el, $name, $unit]) {
  426. $val = (float)($p[$el] ?? 0);
  427. [$minCol, $maxCol] = $pSpecCols[$el];
  428. $min = (float)($ps[$minCol] ?? 0);
  429. $max = (float)($ps[$maxCol] ?? 0);
  430. $range = ($min > 0 || $max > 0)
  431. ? number_format($min, 3) . '–' . number_format($max, 3)
  432. : 'N/A';
  433. $found = $val > 0 ? number_format($val, 3) : 'N/A';
  434. $status = '';
  435. if ($val > 0 && ($min > 0 || $max > 0)) {
  436. if ($min > 0 && $val < $min) { $status = '[DEFICIENT]'; $plantDeficiencies[] = $name; }
  437. elseif ($max > 0 && $val > $max) { $status = '[EXCESS]'; $plantExcesses[] = $name; }
  438. else { $status = '[IDEAL]'; }
  439. }
  440. $elementLines .= sprintf("%-20s %s found: %-8s range: %-15s %s\n",
  441. "$name ($unit):", '', $found, $range, $status);
  442. }
  443. $plantData = "=====================================\n"
  444. . "PLANT TISSUE ANALYSIS — COMPLETE\n"
  445. . "=====================================\n"
  446. . "Client: {$p['client_name']}\n"
  447. . "Crop Type: {$p['crop_type']}\n"
  448. . "Sample ID: {$p['sample_id']}\n"
  449. . "Site ID: {$p['site_id']}\n"
  450. . "Lab No: {$p['lab_no']}\n"
  451. . "Date Sampled: {$p['date_sampled']}\n\n"
  452. . "--- ELEMENT RESULTS ---\n"
  453. . $elementLines . "\n"
  454. . "Deficient: " . (empty($plantDeficiencies) ? 'None detected' : implode(', ', $plantDeficiencies)) . "\n"
  455. . "In Excess: " . (empty($plantExcesses) ? 'None detected' : implode(', ', $plantExcesses)) . "\n"
  456. . "=====================================\n";
  457. // ── Historical comparison block ───────────────────────────────────────────
  458. $historicalContext = '';
  459. if (!empty($historicalRows)) {
  460. $histElements = ['n','p','k','s','mg','ca','na','fe','mn','zn','cu','b','m','co','se','cl','c'];
  461. $histNames = ['N','P','K','S','Mg','Ca','Na','Fe','Mn','Zn','Cu','B','Mo','Co','Se','Cl','C'];
  462. $historicalContext = "\n\n=====================================\n"
  463. . "HISTORICAL RECORDS — {$p['crop_type']} / Sample ID: {$p['sample_id']}\n"
  464. . "(most recent first, same site)\n"
  465. . "=====================================\n";
  466. foreach ($historicalRows as $hr) {
  467. $historicalContext .= "\nDate Sampled: " . ($hr['date_sampled'] ?? 'Unknown') . "\n";
  468. $line = '';
  469. foreach ($histElements as $i => $col) {
  470. $val = ($hr[$col] ?? '') !== '' ? number_format((float)$hr[$col], 3) : 'N/A';
  471. $line .= sprintf(" %-4s %s", $histNames[$i] . ':', $val);
  472. }
  473. $historicalContext .= trim($line) . "\n";
  474. }
  475. $historicalContext .= "=====================================\n";
  476. }
  477. $ragChunks = retrieveRelevantChunks($pdo, $plantData, $section, RAG_TOP_K);
  478. $knowledgeContext = '';
  479. if (!empty($ragChunks)) {
  480. $knowledgeContext = "\n\n===================================================\n"
  481. . "RELEVANT PASSAGES FROM SOIL & PLANT SCIENCE LITERATURE\n"
  482. . "===================================================\n";
  483. foreach ($ragChunks as $i => $chunk) {
  484. $knowledgeContext .= sprintf("\n[%d] \"%s\" — %s (p.%d)\n%s\n",
  485. $i + 1, $chunk['source'], $chunk['author'], $chunk['page'], $chunk['chunk_text']);
  486. }
  487. }
  488. $ctx = $plantData . $historicalContext . $knowledgeContext;
  489. $cropType = $p['crop_type'] ?? 'the crop';
  490. $prompts = [
  491. 'general' =>
  492. "{$system}\n\n{$ctx}\n\n"
  493. . "TASK: Write a concise general comment on the tissue analysis results for THIS SPECIFIC CROP: {$cropType}. "
  494. . "Do NOT mention or compare to any other crop type — your entire response must be about {$cropType} only. "
  495. . "Cover: (1) the overall nutritional status of this {$cropType} sample, "
  496. . "(2) the most significant deficiencies or excesses and their likely effect on {$cropType} yield and quality, "
  497. . "(3) any elements in good balance. Do not recommend specific product names.",
  498. 'ai_interpretation' =>
  499. "{$system}\n\n{$ctx}\n\n"
  500. . "TASK: Write a detailed technical interpretation of the tissue analysis for THIS SPECIFIC CROP: {$cropType}. "
  501. . "Do NOT mention or compare to any other crop type — your entire response must be about {$cropType} only. "
  502. . "Use these headings:\n"
  503. . "1. MAJOR ELEMENTS (N, P, K, S, Mg, Ca, Na)\n"
  504. . "2. TRACE ELEMENTS (Fe, Mn, Zn, Cu, B)\n"
  505. . "3. OTHER ELEMENTS (Mo, Co, Se, Cl, C)\n"
  506. . "4. NUTRIENT INTERACTIONS & ANTAGONISMS\n"
  507. . "5. OVERALL NUTRITIONAL ASSESSMENT FOR {$cropType}\n"
  508. . (!empty($historicalRows)
  509. ? "6. TREND ANALYSIS (compare current results to the HISTORICAL RECORDS provided above — "
  510. . "note which elements have improved, declined, or remained stable, and what this trend means for {$cropType} health)\n"
  511. : '')
  512. . "For each element marked [DEFICIENT] or [EXCESS], explain the agronomic significance "
  513. . "specific to {$cropType}, likely causes, and interactions with other nutrients.",
  514. 'recommended' =>
  515. "{$system}\n\n{$ctx}\n\n"
  516. . "TASK: Design a recommended remedial soil and fertiliser program for {$cropType} to correct the deficiencies shown. "
  517. . "Do NOT mention or compare to any other crop type — your entire response must be specific to {$cropType}. "
  518. . "Format as a numbered list or table: "
  519. . "Element | Current Status | Recommended Action | Product Type (generic) | Rate | Timing. "
  520. . "Prioritise elements marked [DEFICIENT]. "
  521. . "Note any nutrient antagonisms that may be limiting uptake in {$cropType}.",
  522. 'foliar' =>
  523. "{$system}\n\n{$ctx}\n\n"
  524. . "TASK: Design a foliar spray program for {$cropType} to rapidly correct the deficiencies shown. "
  525. . "Do NOT mention or compare to any other crop type — your entire response must be specific to {$cropType}. "
  526. . "Format as a numbered list or table: "
  527. . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
  528. . "Prioritise elements marked [DEFICIENT]. "
  529. . "Note carrier water pH requirements and any tank-mix incompatibilities.",
  530. ];
  531. } else {
  532. // ── Soil: RAG + prompts ───────────────────────────────────────────────────
  533. $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
  534. $knowledgeContext = '';
  535. if (!empty($ragChunks)) {
  536. $knowledgeContext = "\n\n===================================================\n"
  537. . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n"
  538. . "(William A. Albrecht and other authorities)\n"
  539. . "===================================================\n";
  540. foreach ($ragChunks as $i => $chunk) {
  541. $knowledgeContext .= sprintf("\n[%d] \"%s\" — %s (p.%d)\n%s\n",
  542. $i + 1, $chunk['source'], $chunk['author'], $chunk['page'], $chunk['chunk_text']);
  543. }
  544. }
  545. $ctx = $soilData . $knowledgeContext;
  546. $prompts = [
  547. 'overview' =>
  548. "{$system}\n\n{$ctx}\n\n"
  549. . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
  550. . "Cover: (1) overall soil health and fertility level, "
  551. . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
  552. . "(3) any positive attributes. "
  553. . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
  554. 'ai_interpretation' =>
  555. "{$system}\n\n{$ctx}\n\n"
  556. . "TASK: Write a detailed technical interpretation structured with these headings:\n"
  557. . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
  558. . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
  559. . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
  560. . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n"
  561. . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
  562. . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
  563. . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
  564. . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
  565. . "and interactions with other elements. Reference the Albrecht literature where relevant.",
  566. 'foliar' =>
  567. "{$system}\n\n{$ctx}\n\n"
  568. . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
  569. . "Format as a numbered list or table: "
  570. . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
  571. . "Prioritise elements marked [DEFICIENT]. "
  572. . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
  573. . "Add a note on carrier water pH and adjuvant recommendations.",
  574. 'microbial' =>
  575. "{$system}\n\n{$ctx}\n\n"
  576. . "TASK: Design a biological/microbial soil improvement program. "
  577. . "Structure your response:\n"
  578. . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
  579. . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
  580. . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
  581. . "4. TIMING & INTEGRATION with the mineral balancing program\n"
  582. . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
  583. ];
  584. }
  585. // ── Call Ollama ───────────────────────────────────────────────────────────────
  586. $payload = json_encode([
  587. 'model' => OLLAMA_MODEL,
  588. 'prompt' => $prompts[$section],
  589. 'stream' => false,
  590. 'options' => [
  591. 'temperature' => 0.3,
  592. 'num_predict' => 2048,
  593. 'num_ctx' => 6144,
  594. 'repeat_penalty' => 1.1,
  595. 'keep_alive' => -1, // keep model resident between requests
  596. ],
  597. ]);
  598. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  599. curl_setopt_array($ch, [
  600. CURLOPT_POST => true,
  601. CURLOPT_POSTFIELDS => $payload,
  602. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  603. CURLOPT_RETURNTRANSFER => true,
  604. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  605. CURLOPT_CONNECTTIMEOUT => 5,
  606. ]);
  607. $response = curl_exec($ch);
  608. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  609. $curlErr = curl_error($ch);
  610. curl_close($ch);
  611. if ($curlErr || $response === false) {
  612. http_response_code(502);
  613. echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
  614. exit;
  615. }
  616. if ($httpCode !== 200) {
  617. http_response_code(502);
  618. echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
  619. exit;
  620. }
  621. $ollamaData = json_decode($response, true);
  622. $text = trim($ollamaData['response'] ?? '');
  623. if ($text === '') {
  624. http_response_code(502);
  625. echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
  626. exit;
  627. }
  628. echo json_encode([
  629. 'success' => true,
  630. 'text' => $text,
  631. 'rag_chunks_used' => count($ragChunks),
  632. ]);
  633. exit;
  634. // ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
  635. function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
  636. {
  637. try {
  638. $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
  639. } catch (PDOException $e) {
  640. return []; // Table doesn't exist yet
  641. }
  642. if ($count === 0) {
  643. return []; // Knowledge base not yet populated — run ingest_knowledge.php
  644. }
  645. // Try vector similarity first
  646. $queryEmbedding = getQueryEmbedding($queryText);
  647. if ($queryEmbedding !== null) {
  648. return vectorSearch($pdo, $queryEmbedding, $topK);
  649. }
  650. // Fallback: MySQL FULLTEXT search
  651. return fulltextSearch($pdo, $section, $topK);
  652. }
  653. function getQueryEmbedding(string $text): ?array
  654. {
  655. $queryText = substr($text, 0, 2000);
  656. // Try new /api/embed (Ollama >= 0.1.26) first
  657. $ch = curl_init(OLLAMA_HOST . '/api/embed');
  658. curl_setopt_array($ch, [
  659. CURLOPT_POST => true,
  660. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
  661. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  662. CURLOPT_RETURNTRANSFER => true,
  663. CURLOPT_TIMEOUT => 15,
  664. CURLOPT_CONNECTTIMEOUT => 3,
  665. ]);
  666. $resp = curl_exec($ch);
  667. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  668. curl_close($ch);
  669. if ($resp && $code === 200) {
  670. $data = json_decode($resp, true);
  671. $emb = $data['embeddings'][0] ?? null;
  672. if (is_array($emb) && count($emb) > 0) return $emb;
  673. }
  674. // Fallback: legacy /api/embeddings
  675. $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
  676. curl_setopt_array($ch, [
  677. CURLOPT_POST => true,
  678. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
  679. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  680. CURLOPT_RETURNTRANSFER => true,
  681. CURLOPT_TIMEOUT => 15,
  682. CURLOPT_CONNECTTIMEOUT => 3,
  683. ]);
  684. $resp2 = curl_exec($ch);
  685. $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  686. curl_close($ch);
  687. if ($resp2 && $code2 === 200) {
  688. $data2 = json_decode($resp2, true);
  689. $emb2 = $data2['embedding'] ?? null;
  690. if (is_array($emb2) && count($emb2) > 0) return $emb2;
  691. }
  692. return null;
  693. }
  694. function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
  695. {
  696. $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
  697. $scores = [];
  698. while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
  699. $chunkVec = json_decode($row['embedding'], true);
  700. if (!is_array($chunkVec)) continue;
  701. $scores[] = [
  702. 'score' => cosineSimilarity($queryVec, $chunkVec),
  703. 'source' => $row['source'],
  704. 'author' => $row['author'],
  705. 'page' => $row['page'],
  706. 'chunk_text' => $row['chunk_text'],
  707. ];
  708. }
  709. usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
  710. return array_slice($scores, 0, $topK);
  711. }
  712. function fulltextSearch(PDO $pdo, string $section, int $topK): array
  713. {
  714. $keywords = [
  715. 'overview' => 'soil fertility mineral balance calcium magnesium albrecht',
  716. 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
  717. 'foliar' => 'foliar nutrition trace elements deficiency correction spray',
  718. 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
  719. ];
  720. $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
  721. try {
  722. $stmt = $pdo->prepare(
  723. 'SELECT source, author, page, chunk_text,
  724. MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score
  725. FROM knowledge_chunks
  726. WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE)
  727. ORDER BY score DESC
  728. LIMIT ?'
  729. );
  730. $stmt->execute([$query, $query, $topK]);
  731. return $stmt->fetchAll(PDO::FETCH_ASSOC);
  732. } catch (PDOException $e) {
  733. error_log('RAG fulltext search failed: ' . $e->getMessage());
  734. return [];
  735. }
  736. }
  737. function cosineSimilarity(array $a, array $b): float
  738. {
  739. $dot = $normA = $normB = 0.0;
  740. $len = min(count($a), count($b));
  741. for ($i = 0; $i < $len; $i++) {
  742. $dot += $a[$i] * $b[$i];
  743. $normA += $a[$i] * $a[$i];
  744. $normB += $b[$i] * $b[$i];
  745. }
  746. $denom = sqrt($normA) * sqrt($normB);
  747. return $denom > 0 ? $dot / $denom : 0.0;
  748. }