Benjamin Harris 2 месяцев назад
Родитель
Сommit
278373c3e9
1 измененных файлов с 86 добавлено и 123 удалено
  1. 86 123
      controllers/ollamaGenerate.php

+ 86 - 123
controllers/ollamaGenerate.php

@@ -4,12 +4,12 @@
  *
  *
  * AJAX POST handler: generates AI agronomic text using Ollama, grounded
  * AJAX POST handler: generates AI agronomic text using Ollama, grounded
  * with relevant passages retrieved from the soil science knowledge base
  * with relevant passages retrieved from the soil science knowledge base
- * (William A. Albrecht et al.) via RAG (Retrieval-Augmented Generation).
+ * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
  *
  *
  * Flow:
  * Flow:
- *  1. Load full soil record + specification ranges
+ *  1. Load full soil record + specification ranges from DB
  *  2. Build a structured data summary covering ALL measured elements
  *  2. Build a structured data summary covering ALL measured elements
- *  3. Embed that summary via nomic-embed-text → retrieve top-K book passages
+ *  3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
  *  4. Inject retrieved passages + data into a section-specific prompt
  *  4. Inject retrieved passages + data into a section-specific prompt
  *  5. Send to llama3.1 and return the generated text
  *  5. Send to llama3.1 and return the generated text
  *
  *
@@ -18,6 +18,10 @@
  *   rid         int     soil_records.id
  *   rid         int     soil_records.id
  *   rand        string  soil_records.rand
  *   rand        string  soil_records.rand
  *   section     string  overview | ai_interpretation | foliar | microbial
  *   section     string  overview | ai_interpretation | foliar | microbial
+ *
+ * Note: run ingestion from Windows where Ollama is accessible:
+ *   php tools/ingest_knowledge.php --test
+ *   php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
  */
  */
 
 
 if (session_status() === PHP_SESSION_NONE) {
 if (session_status() === PHP_SESSION_NONE) {
@@ -31,11 +35,11 @@ require_once __DIR__ . '/../lib/csrf.php';
 header('Content-Type: application/json');
 header('Content-Type: application/json');
 
 
 // ── Config ───────────────────────────────────────────────────────────────────
 // ── Config ───────────────────────────────────────────────────────────────────
-define('OLLAMA_HOST',      'http://192.168.8.73:11434');
-define('OLLAMA_MODEL',     'llama3.1:8b-instruct-q4_K_M');
-define('EMBED_MODEL',      'nomic-embed-text');
-define('RAG_TOP_K',        6);    // number of knowledge chunks to inject per request
-define('OLLAMA_TIMEOUT',   180);  // seconds
+define('OLLAMA_HOST',    'http://192.168.8.73:11434');
+define('OLLAMA_MODEL',   'llama3.1:8b-instruct-q4_K_M');
+define('EMBED_MODEL',    'nomic-embed-text');
+define('RAG_TOP_K',      6);    // book passages injected per request
+define('OLLAMA_TIMEOUT', 180);  // seconds — LLM can be slow
 
 
 // ── Auth + CSRF ───────────────────────────────────────────────────────────────
 // ── Auth + CSRF ───────────────────────────────────────────────────────────────
 if (!isLoggedIn()) {
 if (!isLoggedIn()) {
@@ -56,9 +60,9 @@ if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
     exit;
     exit;
 }
 }
 
 
-$recordId = (int)trim($_POST['rid']   ?? '');
-$randId   = trim($_POST['rand']       ?? '');
-$section  = trim($_POST['section']    ?? '');
+$recordId = (int)trim($_POST['rid']  ?? '');
+$randId   = trim($_POST['rand']      ?? '');
+$section  = trim($_POST['section']   ?? '');
 
 
 $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
 $validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
 if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
 if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
@@ -95,14 +99,14 @@ try {
     exit;
     exit;
 }
 }
 
 
-// ── Helper: safe float format ────────────────────────────────────────────────
+// ── Helpers ───────────────────────────────────────────────────────────────────
+
 function fv(mixed $v, int $dp = 2): string
 function fv(mixed $v, int $dp = 2): string
 {
 {
     if ($v === null || $v === '') return 'N/A';
     if ($v === null || $v === '') return 'N/A';
     return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
     return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
 }
 }
 
 
-// ── Helper: status vs spec range ─────────────────────────────────────────────
 function rangeStatus(mixed $value, mixed $min, mixed $max): string
 function rangeStatus(mixed $value, mixed $min, mixed $max): string
 {
 {
     if (!is_numeric($value)) return '';
     if (!is_numeric($value)) return '';
@@ -115,7 +119,7 @@ function rangeStatus(mixed $value, mixed $min, mixed $max): string
     return '';
     return '';
 }
 }
 
 
-// ── Helper: resolve spec value from spec row then record row ─────────────────
+/** Resolve a value — check spec row first, then soil record row */
 function sv(array $spec, array $row, string $col): mixed
 function sv(array $spec, array $row, string $col): mixed
 {
 {
     if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
     if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
@@ -126,8 +130,7 @@ function sv(array $spec, array $row, string $col): mixed
 $r = $row;
 $r = $row;
 $s = $spec;
 $s = $spec;
 
 
-// ── Build comprehensive soil data block ───────────────────────────────────────
-// Includes ALL measured elements with status against spec targets
+// ── Build comprehensive soil data block (ALL elements) ────────────────────────
 $soilData = <<<TEXT
 $soilData = <<<TEXT
 =====================================
 =====================================
 SOIL TEST DATA — COMPLETE ANALYSIS
 SOIL TEST DATA — COMPLETE ANALYSIS
@@ -138,7 +141,6 @@ Crop:         {$r['sample_id']}
 Crop Type:    {$r['crop_type']}
 Crop Type:    {$r['crop_type']}
 Soil Type:    {$r['soil_type']}
 Soil Type:    {$r['soil_type']}
 Lab No:       {$r['lab_no']}
 Lab No:       {$r['lab_no']}
-Batch:        {$r['batch_no']}
 Date Sampled: {$r['date_sampled']}
 Date Sampled: {$r['date_sampled']}
 
 
 --- SOIL PHYSICAL / REACTION ---
 --- SOIL PHYSICAL / REACTION ---
@@ -161,7 +163,7 @@ Paramagnetic:            {fv($r['paramag'], 0)}
 --- NITROGEN ---
 --- NITROGEN ---
 Nitrate-N (NO3-N ppm):   {fv($r['NO3_N'],   0)}   [target: 10–20 ppm]  {rangeStatus($r['NO3_N'], 10, 20)}
 Nitrate-N (NO3-N ppm):   {fv($r['NO3_N'],   0)}   [target: 10–20 ppm]  {rangeStatus($r['NO3_N'], 10, 20)}
 Ammonium-N (NH3-N ppm):  {fv($r['NH3_N'],   0)}
 Ammonium-N (NH3-N ppm):  {fv($r['NH3_N'],   0)}
-Total N (est. from C:N): C:N ratio {fv($r['c_n_ratio'], 1)}
+C:N ratio:               {fv($r['c_n_ratio'], 1)}
 
 
 --- PHOSPHORUS ---
 --- PHOSPHORUS ---
 P Colwell (ppm):         {fv($r['p_colwell'], 0)}
 P Colwell (ppm):         {fv($r['p_colwell'], 0)}
@@ -208,31 +210,30 @@ Iron      Fe (total):    {fv($r['fe'],       2)}
 Aluminium Al (ppm):      {fv($r['al'],       2)}
 Aluminium Al (ppm):      {fv($r['al'],       2)}
 Silicon   Si (ppm):      {fv($r['sl_cacl2'], 2)}
 Silicon   Si (ppm):      {fv($r['sl_cacl2'], 2)}
 Cobalt    Co (ppm):      {fv($r['co_dtpa'],  2)}
 Cobalt    Co (ppm):      {fv($r['co_dtpa'],  2)}
-Molybdenum M (ppm):      {fv($r['m_dtpa'],   2)}
+Molybdenum Mo (ppm):     {fv($r['m_dtpa'],   2)}
 Selenium  Se (ppm):      {fv($r['se'],       2)}
 Selenium  Se (ppm):      {fv($r['se'],       2)}
 
 
 --- RATIOS ---
 --- RATIOS ---
 Ca:Mg ratio:             {fv(is_numeric($r['ca_mehlick3']) && is_numeric($r['mg_mehlick3']) && (float)$r['mg_mehlick3'] != 0 ? round((float)$r['ca_mehlick3']/(float)$r['mg_mehlick3'],1) : null, 1)}   [recommended: {fv(sv($s,$r,'ca_mg_ratio'),1)}]
 Ca:Mg ratio:             {fv(is_numeric($r['ca_mehlick3']) && is_numeric($r['mg_mehlick3']) && (float)$r['mg_mehlick3'] != 0 ? round((float)$r['ca_mehlick3']/(float)$r['mg_mehlick3'],1) : null, 1)}   [recommended: {fv(sv($s,$r,'ca_mg_ratio'),1)}]
 C:N  ratio:              {fv($r['c_n_ratio'], 1)}
 C:N  ratio:              {fv($r['c_n_ratio'], 1)}
 
 
---- DEFICIENT ELEMENTS SUMMARY ---
 TEXT;
 TEXT;
 
 
-// Append a quick plain-English deficiency list to help the LLM focus
+// Append quick deficiency/excess summary
 $deficiencies = [];
 $deficiencies = [];
 $excesses     = [];
 $excesses     = [];
 
 
 $checkElements = [
 $checkElements = [
-    ['pH (H2O)',          $r['ph_h2o'],    6.2,  6.8],
-    ['Nitrate-N',         $r['NO3_N'],     10,   20],
-    ['Calcium (ppm)',     $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
-    ['Magnesium (ppm)',   $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
-    ['Potassium (ppm)',   $r['BS_k_ppm'],  sv($s,$r,'k_ppm_min'),  sv($s,$r,'k_ppm_max')],
-    ['Sodium (ppm)',      $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
-    ['Ca sat (%)',        $r['BS_ca2'],    sv($s,$r,'cabs_min'),   sv($s,$r,'cabs_max')],
-    ['Mg sat (%)',        $r['BS_mg2'],    sv($s,$r,'mgbs_min'),   sv($s,$r,'mgbs_max')],
-    ['K sat (%)',         $r['BS_k'],      sv($s,$r,'kbs_min'),    sv($s,$r,'kbs_max')],
-    ['Na sat (%)',        $r['BS_na'],     sv($s,$r,'nabs_min'),   sv($s,$r,'nabs_max')],
+    ['pH (H2O)',        $r['ph_h2o'],    6.2,  6.8],
+    ['Nitrate-N',       $r['NO3_N'],     10,   20],
+    ['Calcium (ppm)',   $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
+    ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
+    ['Potassium (ppm)', $r['BS_k_ppm'],  sv($s,$r,'k_ppm_min'),  sv($s,$r,'k_ppm_max')],
+    ['Sodium (ppm)',    $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
+    ['Ca sat (%)',      $r['BS_ca2'],    sv($s,$r,'cabs_min'),   sv($s,$r,'cabs_max')],
+    ['Mg sat (%)',      $r['BS_mg2'],    sv($s,$r,'mgbs_min'),   sv($s,$r,'mgbs_max')],
+    ['K sat (%)',       $r['BS_k'],      sv($s,$r,'kbs_min'),    sv($s,$r,'kbs_max')],
+    ['Na sat (%)',      $r['BS_na'],     sv($s,$r,'nabs_min'),   sv($s,$r,'nabs_max')],
 ];
 ];
 
 
 foreach ($checkElements as [$label, $val, $lo, $hi]) {
 foreach ($checkElements as [$label, $val, $lo, $hi]) {
@@ -242,13 +243,13 @@ foreach ($checkElements as [$label, $val, $lo, $hi]) {
     if (is_numeric($hi) && $v > (float)$hi) $excesses[]     = $label;
     if (is_numeric($hi) && $v > (float)$hi) $excesses[]     = $label;
 }
 }
 
 
-$soilData .= "\nDeficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies));
-$soilData .= "\nIn Excess: " . (empty($excesses)     ? 'None detected' : implode(', ', $excesses));
-$soilData .= "\n=====================================\n";
+$soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
+$soilData .= "In Excess: " . (empty($excesses)     ? 'None detected' : implode(', ', $excesses))     . "\n";
+$soilData .= "=====================================\n";
 
 
-// ── RAG: embed the soil data query, retrieve relevant book passages ───────────
+// ── RAG: retrieve relevant passages from knowledge_chunks ─────────────────────
+$ragChunks       = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
 $knowledgeContext = '';
 $knowledgeContext = '';
-$ragChunks        = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
 
 
 if (!empty($ragChunks)) {
 if (!empty($ragChunks)) {
     $knowledgeContext = "\n\n===================================================\n"
     $knowledgeContext = "\n\n===================================================\n"
@@ -267,34 +268,30 @@ if (!empty($ragChunks)) {
     }
     }
 }
 }
 
 
-// ── Section-specific system prompts ──────────────────────────────────────────
-$systemInstruction = "You are a certified agronomist specialising in soil fertility, "
-    . "trained in the Albrecht method of soil balancing. "
-    . "You have deep knowledge of soil chemistry, plant nutrition, and the relationship "
-    . "between soil mineral balance and crop/livestock health. "
-    . "Always ground your recommendations in the measured data. "
+// ── Section-specific prompts ──────────────────────────────────────────────────
+$system = "You are a certified agronomist specialising in soil fertility, trained in the "
+    . "Albrecht method of mineral soil balancing. You have deep knowledge of soil chemistry, "
+    . "plant nutrition, and the relationship between soil mineral balance and crop and livestock health. "
+    . "Always ground your recommendations in the measured data provided. "
     . "For Australian conditions, reference typical soil types and climate where relevant. "
     . "For Australian conditions, reference typical soil types and climate where relevant. "
     . "Write in a professional but accessible tone suitable for a farmer-facing report. "
     . "Write in a professional but accessible tone suitable for a farmer-facing report. "
-    . "When the knowledge passages conflict with your training, prefer the passages — they "
-    . "are from authoritative soil science texts.";
+    . "When the knowledge passages conflict with your training, prefer the passages — "
+    . "they are from authoritative soil science texts.";
 
 
-$baseContext = $soilData . $knowledgeContext;
+$ctx = $soilData . $knowledgeContext;
 
 
 $prompts = [
 $prompts = [
-
     'overview' =>
     'overview' =>
-        $systemInstruction . "\n\n" . $baseContext
-        . "\n\nTASK: Write an executive overview of these soil test results (3–4 paragraphs). "
+        "{$system}\n\n{$ctx}\n\n"
+        . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
         . "Cover: (1) overall soil health and fertility level, "
         . "Cover: (1) overall soil health and fertility level, "
         . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
         . "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
-        . "(3) any positive attributes of this soil. "
-        . "Use the Albrecht philosophy as a framework where applicable. "
-        . "Do not list specific product names in this section.",
+        . "(3) any positive attributes. "
+        . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
 
 
     'ai_interpretation' =>
     'ai_interpretation' =>
-        $systemInstruction . "\n\n" . $baseContext
-        . "\n\nTASK: Write a detailed technical interpretation of ALL elements in this soil test. "
-        . "Structure your response with these sections:\n"
+        "{$system}\n\n{$ctx}\n\n"
+        . "TASK: Write a detailed technical interpretation structured with these headings:\n"
         . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
         . "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
         . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
         . "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
         . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
         . "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
@@ -302,29 +299,26 @@ $prompts = [
         . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
         . "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
         . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
         . "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
         . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
         . "7. OVERALL SOIL BALANCE ASSESSMENT\n"
-        . "For each element marked [DEFICIENT] or [EXCESS], explain the agronomic significance "
+        . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
         . "and interactions with other elements. Reference the Albrecht literature where relevant.",
         . "and interactions with other elements. Reference the Albrecht literature where relevant.",
 
 
     'foliar' =>
     'foliar' =>
-        $systemInstruction . "\n\n" . $baseContext
-        . "\n\nTASK: Design a foliar nutrition program to address the deficiencies shown. "
-        . "Format the program as a table or numbered list with: "
-        . "Growth Stage | Product Type | Active Element | Rate (L or kg/ha) | Timing/Frequency. "
+        "{$system}\n\n{$ctx}\n\n"
+        . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
+        . "Format as a numbered list or table: "
+        . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
         . "Prioritise elements marked [DEFICIENT]. "
         . "Prioritise elements marked [DEFICIENT]. "
-        . "Note any antagonisms (e.g. Ca/Mg competition, Zn/P interaction, K/Mg lockout). "
-        . "Keep product recommendations generic (e.g. 'chelated zinc', 'calcium nitrate') "
-        . "rather than brand names. "
+        . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
         . "Add a note on carrier water pH and adjuvant recommendations.",
         . "Add a note on carrier water pH and adjuvant recommendations.",
 
 
     'microbial' =>
     'microbial' =>
-        $systemInstruction . "\n\n" . $baseContext
-        . "\n\nTASK: Design a biological/microbial soil improvement program. "
-        . "Consider the organic matter level, C:N ratio, pH, and base saturation balance shown. "
+        "{$system}\n\n{$ctx}\n\n"
+        . "TASK: Design a biological/microbial soil improvement program. "
         . "Structure your response:\n"
         . "Structure your response:\n"
-        . "1. CURRENT BIOLOGY ASSESSMENT (based on OM, C:N, pH)\n"
-        . "2. RECOMMENDED INOCULANTS (e.g. mycorrhizae, rhizobia, EM, compost tea)\n"
+        . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
+        . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
         . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
         . "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
-        . "4. TIMING & INTEGRATION with the soil balancing program\n"
+        . "4. TIMING & INTEGRATION with the mineral balancing program\n"
         . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
         . "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
 ];
 ];
 
 
@@ -334,8 +328,11 @@ $payload = json_encode([
     'prompt' => $prompts[$section],
     'prompt' => $prompts[$section],
     'stream' => false,
     'stream' => false,
     'options' => [
     'options' => [
-        'temperature' => 0.3,   // lower = more factual / less creative
-        'num_predict' => 2048,
+        'temperature'    => 0.3,
+        'num_predict'    => 2048,
+        'num_ctx'        => 6144,
+        'repeat_penalty' => 1.1,
+        'keep_alive'     => -1,   // keep model resident between requests
     ],
     ],
 ]);
 ]);
 
 
@@ -376,53 +373,41 @@ if ($text === '') {
 }
 }
 
 
 echo json_encode([
 echo json_encode([
-    'success' => true,
-    'text'    => $text,
-    'rag_chunks_used' => count($ragChunks),
+    'success'          => true,
+    'text'             => $text,
+    'rag_chunks_used'  => count($ragChunks),
 ]);
 ]);
 exit;
 exit;
 
 
-// ── RAG retrieval ────────────────────────────────────────────────────────────
+// ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
 
 
-/**
- * Embed a query string, then retrieve the top-K most similar knowledge chunks.
- * Falls back to MySQL FULLTEXT search if no embeddings are in the table or
- * if the embedding API is unavailable.
- *
- * @param PDO    $pdo
- * @param string $queryText  The soil data summary used as the retrieval query
- * @param string $section    Current section (used to build keyword fallback)
- * @param int    $topK
- * @return array  Array of row arrays (source, author, page, chunk_text)
- */
 function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
 function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
 {
 {
-    // Check if we have any chunks at all
-    $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
+    try {
+        $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
+    } catch (PDOException $e) {
+        return [];  // Table doesn't exist yet
+    }
+
     if ($count === 0) {
     if ($count === 0) {
-        return [];  // Knowledge base not yet populated
+        return [];  // Knowledge base not yet populated — run ingest_knowledge.php
     }
     }
 
 
-    // ── Try vector similarity search first ──────────────────────────────────
+    // Try vector similarity first
     $queryEmbedding = getQueryEmbedding($queryText);
     $queryEmbedding = getQueryEmbedding($queryText);
-
     if ($queryEmbedding !== null) {
     if ($queryEmbedding !== null) {
         return vectorSearch($pdo, $queryEmbedding, $topK);
         return vectorSearch($pdo, $queryEmbedding, $topK);
     }
     }
 
 
-    // ── Fallback: MySQL FULLTEXT search ─────────────────────────────────────
+    // Fallback: MySQL FULLTEXT search
     return fulltextSearch($pdo, $section, $topK);
     return fulltextSearch($pdo, $section, $topK);
 }
 }
 
 
-/**
- * Embed text via Ollama. Tries new /api/embed first, falls back to legacy
- * /api/embeddings. Returns float[] or null on failure.
- */
 function getQueryEmbedding(string $text): ?array
 function getQueryEmbedding(string $text): ?array
 {
 {
     $queryText = substr($text, 0, 2000);
     $queryText = substr($text, 0, 2000);
 
 
-    // ── New API (/api/embed, Ollama >= 0.1.26) ───────────────────────────────
+    // Try new /api/embed (Ollama >= 0.1.26) first
     $ch = curl_init(OLLAMA_HOST . '/api/embed');
     $ch = curl_init(OLLAMA_HOST . '/api/embed');
     curl_setopt_array($ch, [
     curl_setopt_array($ch, [
         CURLOPT_POST           => true,
         CURLOPT_POST           => true,
@@ -442,7 +427,7 @@ function getQueryEmbedding(string $text): ?array
         if (is_array($emb) && count($emb) > 0) return $emb;
         if (is_array($emb) && count($emb) > 0) return $emb;
     }
     }
 
 
-    // ── Legacy API (/api/embeddings) ─────────────────────────────────────────
+    // Fallback: legacy /api/embeddings
     $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
     $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
     curl_setopt_array($ch, [
     curl_setopt_array($ch, [
         CURLOPT_POST           => true,
         CURLOPT_POST           => true,
@@ -465,25 +450,16 @@ function getQueryEmbedding(string $text): ?array
     return null;
     return null;
 }
 }
 
 
-/**
- * Load all chunk embeddings from DB, compute cosine similarity, return top-K.
- * For corpora up to ~10k chunks this is fast enough in PHP.
- */
 function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
 function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
 {
 {
-    $stmt = $pdo->query(
-        'SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks'
-    );
-
+    $stmt   = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
     $scores = [];
     $scores = [];
 
 
     while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
     while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
         $chunkVec = json_decode($row['embedding'], true);
         $chunkVec = json_decode($row['embedding'], true);
         if (!is_array($chunkVec)) continue;
         if (!is_array($chunkVec)) continue;
-
-        $sim = cosineSimilarity($queryVec, $chunkVec);
         $scores[] = [
         $scores[] = [
-            'score'      => $sim,
+            'score'      => cosineSimilarity($queryVec, $chunkVec),
             'source'     => $row['source'],
             'source'     => $row['source'],
             'author'     => $row['author'],
             'author'     => $row['author'],
             'page'       => $row['page'],
             'page'       => $row['page'],
@@ -491,24 +467,18 @@ function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
         ];
         ];
     }
     }
 
 
-    // Sort descending by score, return top-K
     usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
     usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
     return array_slice($scores, 0, $topK);
     return array_slice($scores, 0, $topK);
 }
 }
 
 
-/**
- * MySQL FULLTEXT fallback when embeddings aren't available.
- */
 function fulltextSearch(PDO $pdo, string $section, int $topK): array
 function fulltextSearch(PDO $pdo, string $section, int $topK): array
 {
 {
-    // Section-specific keyword hints for the search
     $keywords = [
     $keywords = [
-        'overview'          => 'soil fertility mineral balance calcium magnesium',
+        'overview'          => 'soil fertility mineral balance calcium magnesium albrecht',
         'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
         'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
         'foliar'            => 'foliar nutrition trace elements deficiency correction spray',
         'foliar'            => 'foliar nutrition trace elements deficiency correction spray',
         'microbial'         => 'soil biology microbial organic matter carbon nitrogen humus',
         'microbial'         => 'soil biology microbial organic matter carbon nitrogen humus',
     ];
     ];
-
     $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
     $query = $keywords[$section] ?? 'soil fertility mineral nutrition';
 
 
     try {
     try {
@@ -528,22 +498,15 @@ function fulltextSearch(PDO $pdo, string $section, int $topK): array
     }
     }
 }
 }
 
 
-/**
- * Cosine similarity between two equal-length float vectors.
- */
 function cosineSimilarity(array $a, array $b): float
 function cosineSimilarity(array $a, array $b): float
 {
 {
-    $dot = 0.0;
-    $normA = 0.0;
-    $normB = 0.0;
+    $dot = $normA = $normB = 0.0;
     $len = min(count($a), count($b));
     $len = min(count($a), count($b));
-
     for ($i = 0; $i < $len; $i++) {
     for ($i = 0; $i < $len; $i++) {
         $dot   += $a[$i] * $b[$i];
         $dot   += $a[$i] * $b[$i];
         $normA += $a[$i] * $a[$i];
         $normA += $a[$i] * $a[$i];
         $normB += $b[$i] * $b[$i];
         $normB += $b[$i] * $b[$i];
     }
     }
-
     $denom = sqrt($normA) * sqrt($normB);
     $denom = sqrt($normA) * sqrt($normB);
     return $denom > 0 ? $dot / $denom : 0.0;
     return $denom > 0 ? $dot / $denom : 0.0;
 }
 }