ollamaGenerate.php 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. <?php
  2. /**
  3. * controllers/ollamaGenerate.php
  4. *
  5. * AJAX POST handler: generates AI agronomic text using Ollama, grounded
  6. * with relevant passages retrieved from the soil science knowledge base
  7. * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
  8. *
  9. * Flow:
  10. * 1. Load full soil record + specification ranges from DB
  11. * 2. Build a structured data summary covering ALL measured elements
  12. * 3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
  13. * 4. Inject retrieved passages + data into a section-specific prompt
  14. * 5. Send to llama3.1 and return the generated text
  15. *
  16. * POST params:
  17. * csrf_token string
  18. * rid int soil_records.id
  19. * rand string soil_records.rand
  20. * section string overview | ai_interpretation | foliar | microbial
  21. *
  22. * Note: run ingestion from Windows where Ollama is accessible:
  23. * php tools/ingest_knowledge.php --test
  24. * php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
  25. */
  26. if (session_status() === PHP_SESSION_NONE) {
  27. session_start();
  28. }
  29. require_once __DIR__ . '/../config/database.php';
  30. require_once __DIR__ . '/../lib/auth.php';
  31. require_once __DIR__ . '/../lib/csrf.php';
  32. header('Content-Type: application/json');
  33. // ── Config ───────────────────────────────────────────────────────────────────
  34. define('OLLAMA_HOST', 'http://192.168.8.73:11434');
  35. define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
  36. define('EMBED_MODEL', 'nomic-embed-text');
  37. define('RAG_TOP_K', 6); // book passages injected per request
  38. define('OLLAMA_TIMEOUT', 180); // seconds — LLM can be slow
  39. // ── Auth + CSRF ───────────────────────────────────────────────────────────────
  40. if (!isLoggedIn()) {
  41. http_response_code(401);
  42. echo json_encode(['success' => false, 'error' => 'Not authenticated']);
  43. exit;
  44. }
  45. if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
  46. http_response_code(405);
  47. echo json_encode(['success' => false, 'error' => 'Method not allowed']);
  48. exit;
  49. }
  50. if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
  51. http_response_code(403);
  52. echo json_encode(['success' => false, 'error' => 'Invalid CSRF token']);
  53. exit;
  54. }
  55. $recordId = (int)trim($_POST['rid'] ?? '');
  56. $randId = trim($_POST['rand'] ?? '');
  57. $section = trim($_POST['section'] ?? '');
  58. $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
  59. if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
  60. http_response_code(400);
  61. echo json_encode(['success' => false, 'error' => 'Invalid parameters']);
  62. exit;
  63. }
  64. // ── Load soil record + spec ───────────────────────────────────────────────────
  65. try {
  66. $pdo = getDBConnection();
  67. $stmt = $pdo->prepare('SELECT * FROM soil_records WHERE id = ? AND rand = ?');
  68. $stmt->execute([$recordId, $randId]);
  69. $row = $stmt->fetch(PDO::FETCH_ASSOC);
  70. if (!$row) {
  71. http_response_code(404);
  72. echo json_encode(['success' => false, 'error' => 'Record not found']);
  73. exit;
  74. }
  75. $spec = [];
  76. if (!empty($row['soil_type'])) {
  77. $stmtSpec = $pdo->prepare('SELECT * FROM soil_specifications WHERE soil_type = ? LIMIT 1');
  78. $stmtSpec->execute([$row['soil_type']]);
  79. $spec = $stmtSpec->fetch(PDO::FETCH_ASSOC) ?: [];
  80. }
  81. } catch (PDOException $e) {
  82. error_log('DB error in ollamaGenerate.php: ' . $e->getMessage());
  83. http_response_code(500);
  84. echo json_encode(['success' => false, 'error' => 'Database error']);
  85. exit;
  86. }
  87. // ── Helpers ───────────────────────────────────────────────────────────────────
  88. function fv(mixed $v, int $dp = 2): string
  89. {
  90. if ($v === null || $v === '') return 'N/A';
  91. return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
  92. }
  93. function rangeStatus(mixed $value, mixed $min, mixed $max): string
  94. {
  95. if (!is_numeric($value)) return '';
  96. $v = (float)$value;
  97. $lo = is_numeric($min) ? (float)$min : null;
  98. $hi = is_numeric($max) ? (float)$max : null;
  99. if ($lo !== null && $v < $lo) return '[DEFICIENT]';
  100. if ($hi !== null && $v > $hi) return '[EXCESS]';
  101. if ($lo !== null || $hi !== null) return '[IDEAL]';
  102. return '';
  103. }
  104. /** Resolve a value — check spec row first, then soil record row */
  105. function sv(array $spec, array $row, string $col): mixed
  106. {
  107. if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
  108. if (isset($row[$col]) && $row[$col] !== '' && $row[$col] !== null) return $row[$col];
  109. return null;
  110. }
  111. $r = $row;
  112. $s = $spec;
  113. // ── Build comprehensive soil data block (ALL elements) ────────────────────────
  114. $soilData = <<<TEXT
  115. =====================================
  116. SOIL TEST DATA — COMPLETE ANALYSIS
  117. =====================================
  118. Client: {$r['client_name']}
  119. Location: {$r['site_address']}, {$r['state_postcode']}
  120. Crop: {$r['sample_id']}
  121. Crop Type: {$r['crop_type']}
  122. Soil Type: {$r['soil_type']}
  123. Lab No: {$r['lab_no']}
  124. Date Sampled: {$r['date_sampled']}
  125. --- SOIL PHYSICAL / REACTION ---
  126. pH (H2O): {fv($r['ph_h2o'], 1)} [target: 6.2–6.8] {rangeStatus($r['ph_h2o'], 6.2, 6.8)}
  127. pH (CaCl2): {fv($r['ph_cacl2'], 1)}
  128. EC (mS/cm): {fv($r['ec'], 2)}
  129. Colour: {$r['colour']}
  130. Texture: {$r['texture']}
  131. Gravel (%): {fv($r['gravel'], 1)}
  132. --- ORGANIC MATTER ---
  133. Organic Carbon (%): {fv($r['ocarbon'], 1)}
  134. Organic Matter (%): {fv($r['omatter'], 1)}
  135. --- CATION EXCHANGE ---
  136. CEC (meq/100g): {fv($r['cec'], 2)}
  137. TEC (meq/100g): {fv($r['tec'], 2)}
  138. Paramagnetic: {fv($r['paramag'], 0)}
  139. --- NITROGEN ---
  140. Nitrate-N (NO3-N ppm): {fv($r['NO3_N'], 0)} [target: 10–20 ppm] {rangeStatus($r['NO3_N'], 10, 20)}
  141. Ammonium-N (NH3-N ppm): {fv($r['NH3_N'], 0)}
  142. C:N ratio: {fv($r['c_n_ratio'], 1)}
  143. --- PHOSPHORUS ---
  144. P Colwell (ppm): {fv($r['p_colwell'], 0)}
  145. P Morgan (ppm): {fv($r['p_morgan'], 0)}
  146. P Mehlick (ppm): {fv($r['p_mehlick'], 0)}
  147. P Bray2 (ppm): {fv($r['p_bray2'], 0)}
  148. --- MAJOR CATIONS (ppm) ---
  149. Calcium Ca (ppm): {fv($r['BS_ca_ppm'], 0)} [min: {fv(sv($s,$r,'ca_ppm_min'),0)}, max: {fv(sv($s,$r,'ca_ppm_max'),0)}] {rangeStatus($r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max'))}
  150. Magnesium Mg (ppm): {fv($r['BS_mg_ppm'], 0)} [min: {fv(sv($s,$r,'mg_ppm_min'),0)}, max: {fv(sv($s,$r,'mg_ppm_max'),0)}] {rangeStatus($r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max'))}
  151. Potassium K (ppm): {fv($r['BS_k_ppm'], 0)} [min: {fv(sv($s,$r,'k_ppm_min'), 0)}, max: {fv(sv($s,$r,'k_ppm_max'), 0)}] {rangeStatus($r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max'))}
  152. Sodium Na (ppm): {fv($r['BS_na_ppm'], 0)} [min: {fv(sv($s,$r,'na_ppm_min'),0)}, max: {fv(sv($s,$r,'na_ppm_max'),0)}] {rangeStatus($r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max'))}
  153. --- BASE SATURATIONS (%) ---
  154. Calcium Ca (%): {fv($r['BS_ca2'], 2)}% [min: {fv(sv($s,$r,'cabs_min'),1)}, max: {fv(sv($s,$r,'cabs_max'),1)}] {rangeStatus($r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max'))}
  155. Magnesium Mg (%): {fv($r['BS_mg2'], 2)}% [min: {fv(sv($s,$r,'mgbs_min'),1)}, max: {fv(sv($s,$r,'mgbs_max'),1)}] {rangeStatus($r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max'))}
  156. Potassium K (%): {fv($r['BS_k'], 2)}% [min: {fv(sv($s,$r,'kbs_min'), 1)}, max: {fv(sv($s,$r,'kbs_max'), 1)}] {rangeStatus($r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max'))}
  157. Sodium Na (%): {fv($r['BS_na'], 2)}% [min: {fv(sv($s,$r,'nabs_min'),1)}, max: {fv(sv($s,$r,'nabs_max'),1)}] {rangeStatus($r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max'))}
  158. Other Bases (%): {fv($r['BS_ob'], 2)}% [recommended: {fv(sv($s,$r,'ob_rec'),1)}]
  159. Hydrogen (%): {fv($r['BS_h'], 2)}% [recommended: {fv(sv($s,$r,'h_rec'), 1)}]
  160. Aluminium Al3 (%): {fv($r['BS_al3'], 2)}%
  161. --- MORGANS EXTRACT (ppm) ---
  162. Ca Morgan: {fv($r['ca_morgan'], 2)}
  163. Mg Morgan: {fv($r['mg_morgan'], 2)}
  164. K Morgan: {fv($r['k_morgan'], 2)}
  165. Na Morgan: {fv($r['na_morgan'], 2)}
  166. --- MEHLICK-3 EXTRACT (ppm) ---
  167. Ca Mehlick3: {fv($r['ca_mehlick3'], 2)}
  168. Mg Mehlick3: {fv($r['mg_mehlick3'], 2)}
  169. K Mehlick3: {fv($r['k_mehlick3'], 2)}
  170. Na Mehlick3: {fv($r['na_mehlick3'], 2)}
  171. Al Mehlick3: {fv($r['al_mehlick3'], 2)}
  172. --- TRACE ELEMENTS (ppm) ---
  173. Sulfur S (ppm): {fv($r['s_morgan'], 2)}
  174. Boron B (ppm): {fv($r['b_cacl2'], 2)}
  175. Manganese Mn (ppm): {fv($r['mn_dtpa'], 2)}
  176. Copper Cu (ppm): {fv($r['cu_dtpa'], 2)}
  177. Zinc Zn (ppm): {fv($r['zn_dtpa'], 2)}
  178. Iron Fe (ppm): {fv($r['fe_dtpa'], 2)}
  179. Iron Fe (total): {fv($r['fe'], 2)}
  180. Aluminium Al (ppm): {fv($r['al'], 2)}
  181. Silicon Si (ppm): {fv($r['sl_cacl2'], 2)}
  182. Cobalt Co (ppm): {fv($r['co_dtpa'], 2)}
  183. Molybdenum Mo (ppm): {fv($r['m_dtpa'], 2)}
  184. Selenium Se (ppm): {fv($r['se'], 2)}
  185. --- RATIOS ---
  186. Ca:Mg ratio: {fv(is_numeric($r['ca_mehlick3']) && is_numeric($r['mg_mehlick3']) && (float)$r['mg_mehlick3'] != 0 ? round((float)$r['ca_mehlick3']/(float)$r['mg_mehlick3'],1) : null, 1)} [recommended: {fv(sv($s,$r,'ca_mg_ratio'),1)}]
  187. C:N ratio: {fv($r['c_n_ratio'], 1)}
  188. TEXT;
  189. // Append quick deficiency/excess summary
  190. $deficiencies = [];
  191. $excesses = [];
  192. $checkElements = [
  193. ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
  194. ['Nitrate-N', $r['NO3_N'], 10, 20],
  195. ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
  196. ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
  197. ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
  198. ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
  199. ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
  200. ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
  201. ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
  202. ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
  203. ];
  204. foreach ($checkElements as [$label, $val, $lo, $hi]) {
  205. if (!is_numeric($val)) continue;
  206. $v = (float)$val;
  207. if (is_numeric($lo) && $v < (float)$lo) $deficiencies[] = $label;
  208. if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
  209. }
  210. $soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
  211. $soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n";
  212. $soilData .= "=====================================\n";
  213. // ── RAG: retrieve relevant passages from knowledge_chunks ─────────────────────
  214. $ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
  215. $knowledgeContext = '';
  216. if (!empty($ragChunks)) {
  217. $knowledgeContext = "\n\n===================================================\n"
  218. . "RELEVANT PASSAGES FROM SOIL SCIENCE LITERATURE\n"
  219. . "(William A. Albrecht and other authorities)\n"
  220. . "===================================================\n";
  221. foreach ($ragChunks as $i => $chunk) {
  222. $knowledgeContext .= sprintf(
  223. "\n[%d] \"%s\" — %s (p.%d)\n%s\n",
  224. $i + 1,
  225. $chunk['source'],
  226. $chunk['author'],
  227. $chunk['page'],
  228. $chunk['chunk_text']
  229. );
  230. }
  231. }
  232. // ── Section-specific prompts ──────────────────────────────────────────────────
  233. $system = "You are a certified agronomist specialising in soil fertility, trained in the "
  234. . "Albrecht method of mineral soil balancing. You have deep knowledge of soil chemistry, "
  235. . "plant nutrition, and the relationship between soil mineral balance and crop and livestock health. "
  236. . "Always ground your recommendations in the measured data provided. "
  237. . "For Australian conditions, reference typical soil types and climate where relevant. "
  238. . "Write in a professional but accessible tone suitable for a farmer-facing report. "
  239. . "When the knowledge passages conflict with your training, prefer the passages — "
  240. . "they are from authoritative soil science texts.";
  241. $ctx = $soilData . $knowledgeContext;
  242. $prompts = [
  243. 'overview' =>
  244. "{$system}\n\n{$ctx}\n\n"
  245. . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
  246. . "Cover: (1) overall soil health and fertility level, "
  247. . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
  248. . "(3) any positive attributes. "
  249. . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
  250. 'ai_interpretation' =>
  251. "{$system}\n\n{$ctx}\n\n"
  252. . "TASK: Write a detailed technical interpretation structured with these headings:\n"
  253. . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
  254. . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
  255. . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
  256. . "4. MAJOR ELEMENTS (Ca, Mg, K, Na, P — ppm and saturation %)\n"
  257. . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
  258. . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
  259. . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
  260. . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
  261. . "and interactions with other elements. Reference the Albrecht literature where relevant.",
  262. 'foliar' =>
  263. "{$system}\n\n{$ctx}\n\n"
  264. . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
  265. . "Format as a numbered list or table: "
  266. . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
  267. . "Prioritise elements marked [DEFICIENT]. "
  268. . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
  269. . "Add a note on carrier water pH and adjuvant recommendations.",
  270. 'microbial' =>
  271. "{$system}\n\n{$ctx}\n\n"
  272. . "TASK: Design a biological/microbial soil improvement program. "
  273. . "Structure your response:\n"
  274. . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
  275. . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
  276. . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
  277. . "4. TIMING & INTEGRATION with the mineral balancing program\n"
  278. . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
  279. ];
  280. // ── Call Ollama ───────────────────────────────────────────────────────────────
  281. $payload = json_encode([
  282. 'model' => OLLAMA_MODEL,
  283. 'prompt' => $prompts[$section],
  284. 'stream' => false,
  285. 'options' => [
  286. 'temperature' => 0.3,
  287. 'num_predict' => 2048,
  288. 'num_ctx' => 6144,
  289. 'repeat_penalty' => 1.1,
  290. 'keep_alive' => -1, // keep model resident between requests
  291. ],
  292. ]);
  293. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  294. curl_setopt_array($ch, [
  295. CURLOPT_POST => true,
  296. CURLOPT_POSTFIELDS => $payload,
  297. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  298. CURLOPT_RETURNTRANSFER => true,
  299. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  300. CURLOPT_CONNECTTIMEOUT => 5,
  301. ]);
  302. $response = curl_exec($ch);
  303. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  304. $curlErr = curl_error($ch);
  305. curl_close($ch);
  306. if ($curlErr || $response === false) {
  307. http_response_code(502);
  308. echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
  309. exit;
  310. }
  311. if ($httpCode !== 200) {
  312. http_response_code(502);
  313. echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
  314. exit;
  315. }
  316. $ollamaData = json_decode($response, true);
  317. $text = trim($ollamaData['response'] ?? '');
  318. if ($text === '') {
  319. http_response_code(502);
  320. echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
  321. exit;
  322. }
  323. echo json_encode([
  324. 'success' => true,
  325. 'text' => $text,
  326. 'rag_chunks_used' => count($ragChunks),
  327. ]);
  328. exit;
  329. // ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
  330. function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
  331. {
  332. try {
  333. $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
  334. } catch (PDOException $e) {
  335. return []; // Table doesn't exist yet
  336. }
  337. if ($count === 0) {
  338. return []; // Knowledge base not yet populated — run ingest_knowledge.php
  339. }
  340. // Try vector similarity first
  341. $queryEmbedding = getQueryEmbedding($queryText);
  342. if ($queryEmbedding !== null) {
  343. return vectorSearch($pdo, $queryEmbedding, $topK);
  344. }
  345. // Fallback: MySQL FULLTEXT search
  346. return fulltextSearch($pdo, $section, $topK);
  347. }
  348. function getQueryEmbedding(string $text): ?array
  349. {
  350. $queryText = substr($text, 0, 2000);
  351. // Try new /api/embed (Ollama >= 0.1.26) first
  352. $ch = curl_init(OLLAMA_HOST . '/api/embed');
  353. curl_setopt_array($ch, [
  354. CURLOPT_POST => true,
  355. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
  356. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  357. CURLOPT_RETURNTRANSFER => true,
  358. CURLOPT_TIMEOUT => 15,
  359. CURLOPT_CONNECTTIMEOUT => 3,
  360. ]);
  361. $resp = curl_exec($ch);
  362. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  363. curl_close($ch);
  364. if ($resp && $code === 200) {
  365. $data = json_decode($resp, true);
  366. $emb = $data['embeddings'][0] ?? null;
  367. if (is_array($emb) && count($emb) > 0) return $emb;
  368. }
  369. // Fallback: legacy /api/embeddings
  370. $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
  371. curl_setopt_array($ch, [
  372. CURLOPT_POST => true,
  373. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
  374. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  375. CURLOPT_RETURNTRANSFER => true,
  376. CURLOPT_TIMEOUT => 15,
  377. CURLOPT_CONNECTTIMEOUT => 3,
  378. ]);
  379. $resp2 = curl_exec($ch);
  380. $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  381. curl_close($ch);
  382. if ($resp2 && $code2 === 200) {
  383. $data2 = json_decode($resp2, true);
  384. $emb2 = $data2['embedding'] ?? null;
  385. if (is_array($emb2) && count($emb2) > 0) return $emb2;
  386. }
  387. return null;
  388. }
  389. function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
  390. {
  391. $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
  392. $scores = [];
  393. while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
  394. $chunkVec = json_decode($row['embedding'], true);
  395. if (!is_array($chunkVec)) continue;
  396. $scores[] = [
  397. 'score' => cosineSimilarity($queryVec, $chunkVec),
  398. 'source' => $row['source'],
  399. 'author' => $row['author'],
  400. 'page' => $row['page'],
  401. 'chunk_text' => $row['chunk_text'],
  402. ];
  403. }
  404. usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
  405. return array_slice($scores, 0, $topK);
  406. }
  407. function fulltextSearch(PDO $pdo, string $section, int $topK): array
  408. {
  409. $keywords = [
  410. 'overview' => 'soil fertility mineral balance calcium magnesium albrecht',
  411. 'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
  412. 'foliar' => 'foliar nutrition trace elements deficiency correction spray',
  413. 'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
  414. ];
  415. $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
  416. try {
  417. $stmt = $pdo->prepare(
  418. 'SELECT source, author, page, chunk_text,
  419. MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE) AS score
  420. FROM knowledge_chunks
  421. WHERE MATCH(chunk_text) AGAINST(? IN NATURAL LANGUAGE MODE)
  422. ORDER BY score DESC
  423. LIMIT ?'
  424. );
  425. $stmt->execute([$query, $query, $topK]);
  426. return $stmt->fetchAll(PDO::FETCH_ASSOC);
  427. } catch (PDOException $e) {
  428. error_log('RAG fulltext search failed: ' . $e->getMessage());
  429. return [];
  430. }
  431. }
  432. function cosineSimilarity(array $a, array $b): float
  433. {
  434. $dot = $normA = $normB = 0.0;
  435. $len = min(count($a), count($b));
  436. for ($i = 0; $i < $len; $i++) {
  437. $dot += $a[$i] * $b[$i];
  438. $normA += $a[$i] * $a[$i];
  439. $normB += $b[$i] * $b[$i];
  440. }
  441. $denom = sqrt($normA) * sqrt($normB);
  442. return $denom > 0 ? $dot / $denom : 0.0;
  443. }