|
@@ -4,12 +4,12 @@
|
|
|
*
|
|
*
|
|
|
* AJAX POST handler: generates AI agronomic text using Ollama, grounded
|
|
* AJAX POST handler: generates AI agronomic text using Ollama, grounded
|
|
|
* with relevant passages retrieved from the soil science knowledge base
|
|
* with relevant passages retrieved from the soil science knowledge base
|
|
|
- * (William A. Albrecht et al.) via RAG (Retrieval-Augmented Generation).
|
|
|
|
|
|
|
+ * (William A. Albrecht et al.) stored in MySQL knowledge_chunks.
|
|
|
*
|
|
*
|
|
|
* Flow:
|
|
* Flow:
|
|
|
- * 1. Load full soil record + specification ranges
|
|
|
|
|
|
|
+ * 1. Load full soil record + specification ranges from DB
|
|
|
* 2. Build a structured data summary covering ALL measured elements
|
|
* 2. Build a structured data summary covering ALL measured elements
|
|
|
- * 3. Embed that summary via nomic-embed-text → retrieve top-K book passages
|
|
|
|
|
|
|
+ * 3. Embed that summary via nomic-embed-text → cosine search over knowledge_chunks
|
|
|
* 4. Inject retrieved passages + data into a section-specific prompt
|
|
* 4. Inject retrieved passages + data into a section-specific prompt
|
|
|
* 5. Send to llama3.1 and return the generated text
|
|
* 5. Send to llama3.1 and return the generated text
|
|
|
*
|
|
*
|
|
@@ -18,6 +18,10 @@
|
|
|
* rid int soil_records.id
|
|
* rid int soil_records.id
|
|
|
* rand string soil_records.rand
|
|
* rand string soil_records.rand
|
|
|
* section string overview | ai_interpretation | foliar | microbial
|
|
* section string overview | ai_interpretation | foliar | microbial
|
|
|
|
|
+ *
|
|
|
|
|
+ * Note: run ingestion from Windows where Ollama is accessible:
|
|
|
|
|
+ * php tools/ingest_knowledge.php --test
|
|
|
|
|
+ * php tools/ingest_knowledge.php --file="book.pdf" --author="William A. Albrecht"
|
|
|
*/
|
|
*/
|
|
|
|
|
|
|
|
if (session_status() === PHP_SESSION_NONE) {
|
|
if (session_status() === PHP_SESSION_NONE) {
|
|
@@ -31,11 +35,11 @@ require_once __DIR__ . '/../lib/csrf.php';
|
|
|
header('Content-Type: application/json');
|
|
header('Content-Type: application/json');
|
|
|
|
|
|
|
|
// ── Config ───────────────────────────────────────────────────────────────────
|
|
// ── Config ───────────────────────────────────────────────────────────────────
|
|
|
-define('OLLAMA_HOST', 'http://192.168.8.73:11434');
|
|
|
|
|
-define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
|
|
|
|
|
-define('EMBED_MODEL', 'nomic-embed-text');
|
|
|
|
|
-define('RAG_TOP_K', 6); // number of knowledge chunks to inject per request
|
|
|
|
|
-define('OLLAMA_TIMEOUT', 180); // seconds
|
|
|
|
|
|
|
+define('OLLAMA_HOST', 'http://192.168.8.73:11434');
|
|
|
|
|
+define('OLLAMA_MODEL', 'llama3.1:8b-instruct-q4_K_M');
|
|
|
|
|
+define('EMBED_MODEL', 'nomic-embed-text');
|
|
|
|
|
+define('RAG_TOP_K', 6); // book passages injected per request
|
|
|
|
|
+define('OLLAMA_TIMEOUT', 180); // seconds — LLM can be slow
|
|
|
|
|
|
|
|
// ── Auth + CSRF ───────────────────────────────────────────────────────────────
|
|
// ── Auth + CSRF ───────────────────────────────────────────────────────────────
|
|
|
if (!isLoggedIn()) {
|
|
if (!isLoggedIn()) {
|
|
@@ -56,9 +60,9 @@ if (!verifyCsrfToken($_POST['csrf_token'] ?? '')) {
|
|
|
exit;
|
|
exit;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-$recordId = (int)trim($_POST['rid'] ?? '');
|
|
|
|
|
-$randId = trim($_POST['rand'] ?? '');
|
|
|
|
|
-$section = trim($_POST['section'] ?? '');
|
|
|
|
|
|
|
+$recordId = (int)trim($_POST['rid'] ?? '');
|
|
|
|
|
+$randId = trim($_POST['rand'] ?? '');
|
|
|
|
|
+$section = trim($_POST['section'] ?? '');
|
|
|
|
|
|
|
|
$validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
|
|
$validSections = ['overview', 'ai_interpretation', 'foliar', 'microbial'];
|
|
|
if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
|
|
if (!$recordId || $randId === '' || !in_array($section, $validSections, true)) {
|
|
@@ -95,14 +99,14 @@ try {
|
|
|
exit;
|
|
exit;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// ── Helper: safe float format ────────────────────────────────────────────────
|
|
|
|
|
|
|
+// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
|
+
|
|
|
function fv(mixed $v, int $dp = 2): string
|
|
function fv(mixed $v, int $dp = 2): string
|
|
|
{
|
|
{
|
|
|
if ($v === null || $v === '') return 'N/A';
|
|
if ($v === null || $v === '') return 'N/A';
|
|
|
return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
|
|
return is_numeric($v) ? number_format((float)$v, $dp) : (string)$v;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// ── Helper: status vs spec range ─────────────────────────────────────────────
|
|
|
|
|
function rangeStatus(mixed $value, mixed $min, mixed $max): string
|
|
function rangeStatus(mixed $value, mixed $min, mixed $max): string
|
|
|
{
|
|
{
|
|
|
if (!is_numeric($value)) return '';
|
|
if (!is_numeric($value)) return '';
|
|
@@ -115,7 +119,7 @@ function rangeStatus(mixed $value, mixed $min, mixed $max): string
|
|
|
return '';
|
|
return '';
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// ── Helper: resolve spec value from spec row then record row ─────────────────
|
|
|
|
|
|
|
+/** Resolve a value — check spec row first, then soil record row */
|
|
|
function sv(array $spec, array $row, string $col): mixed
|
|
function sv(array $spec, array $row, string $col): mixed
|
|
|
{
|
|
{
|
|
|
if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
|
|
if (isset($spec[$col]) && $spec[$col] !== '' && $spec[$col] !== null) return $spec[$col];
|
|
@@ -126,8 +130,7 @@ function sv(array $spec, array $row, string $col): mixed
|
|
|
$r = $row;
|
|
$r = $row;
|
|
|
$s = $spec;
|
|
$s = $spec;
|
|
|
|
|
|
|
|
-// ── Build comprehensive soil data block ───────────────────────────────────────
|
|
|
|
|
-// Includes ALL measured elements with status against spec targets
|
|
|
|
|
|
|
+// ── Build comprehensive soil data block (ALL elements) ────────────────────────
|
|
|
$soilData = <<<TEXT
|
|
$soilData = <<<TEXT
|
|
|
=====================================
|
|
=====================================
|
|
|
SOIL TEST DATA — COMPLETE ANALYSIS
|
|
SOIL TEST DATA — COMPLETE ANALYSIS
|
|
@@ -138,7 +141,6 @@ Crop: {$r['sample_id']}
|
|
|
Crop Type: {$r['crop_type']}
|
|
Crop Type: {$r['crop_type']}
|
|
|
Soil Type: {$r['soil_type']}
|
|
Soil Type: {$r['soil_type']}
|
|
|
Lab No: {$r['lab_no']}
|
|
Lab No: {$r['lab_no']}
|
|
|
-Batch: {$r['batch_no']}
|
|
|
|
|
Date Sampled: {$r['date_sampled']}
|
|
Date Sampled: {$r['date_sampled']}
|
|
|
|
|
|
|
|
--- SOIL PHYSICAL / REACTION ---
|
|
--- SOIL PHYSICAL / REACTION ---
|
|
@@ -161,7 +163,7 @@ Paramagnetic: {fv($r['paramag'], 0)}
|
|
|
--- NITROGEN ---
|
|
--- NITROGEN ---
|
|
|
Nitrate-N (NO3-N ppm): {fv($r['NO3_N'], 0)} [target: 10–20 ppm] {rangeStatus($r['NO3_N'], 10, 20)}
|
|
Nitrate-N (NO3-N ppm): {fv($r['NO3_N'], 0)} [target: 10–20 ppm] {rangeStatus($r['NO3_N'], 10, 20)}
|
|
|
Ammonium-N (NH3-N ppm): {fv($r['NH3_N'], 0)}
|
|
Ammonium-N (NH3-N ppm): {fv($r['NH3_N'], 0)}
|
|
|
-Total N (est. from C:N): C:N ratio {fv($r['c_n_ratio'], 1)}
|
|
|
|
|
|
|
+C:N ratio: {fv($r['c_n_ratio'], 1)}
|
|
|
|
|
|
|
|
--- PHOSPHORUS ---
|
|
--- PHOSPHORUS ---
|
|
|
P Colwell (ppm): {fv($r['p_colwell'], 0)}
|
|
P Colwell (ppm): {fv($r['p_colwell'], 0)}
|
|
@@ -208,31 +210,30 @@ Iron Fe (total): {fv($r['fe'], 2)}
|
|
|
Aluminium Al (ppm): {fv($r['al'], 2)}
|
|
Aluminium Al (ppm): {fv($r['al'], 2)}
|
|
|
Silicon Si (ppm): {fv($r['sl_cacl2'], 2)}
|
|
Silicon Si (ppm): {fv($r['sl_cacl2'], 2)}
|
|
|
Cobalt Co (ppm): {fv($r['co_dtpa'], 2)}
|
|
Cobalt Co (ppm): {fv($r['co_dtpa'], 2)}
|
|
|
-Molybdenum M (ppm): {fv($r['m_dtpa'], 2)}
|
|
|
|
|
|
|
+Molybdenum Mo (ppm): {fv($r['m_dtpa'], 2)}
|
|
|
Selenium Se (ppm): {fv($r['se'], 2)}
|
|
Selenium Se (ppm): {fv($r['se'], 2)}
|
|
|
|
|
|
|
|
--- RATIOS ---
|
|
--- RATIOS ---
|
|
|
Ca:Mg ratio: {fv(is_numeric($r['ca_mehlick3']) && is_numeric($r['mg_mehlick3']) && (float)$r['mg_mehlick3'] != 0 ? round((float)$r['ca_mehlick3']/(float)$r['mg_mehlick3'],1) : null, 1)} [recommended: {fv(sv($s,$r,'ca_mg_ratio'),1)}]
|
|
Ca:Mg ratio: {fv(is_numeric($r['ca_mehlick3']) && is_numeric($r['mg_mehlick3']) && (float)$r['mg_mehlick3'] != 0 ? round((float)$r['ca_mehlick3']/(float)$r['mg_mehlick3'],1) : null, 1)} [recommended: {fv(sv($s,$r,'ca_mg_ratio'),1)}]
|
|
|
C:N ratio: {fv($r['c_n_ratio'], 1)}
|
|
C:N ratio: {fv($r['c_n_ratio'], 1)}
|
|
|
|
|
|
|
|
---- DEFICIENT ELEMENTS SUMMARY ---
|
|
|
|
|
TEXT;
|
|
TEXT;
|
|
|
|
|
|
|
|
-// Append a quick plain-English deficiency list to help the LLM focus
|
|
|
|
|
|
|
+// Append quick deficiency/excess summary
|
|
|
$deficiencies = [];
|
|
$deficiencies = [];
|
|
|
$excesses = [];
|
|
$excesses = [];
|
|
|
|
|
|
|
|
$checkElements = [
|
|
$checkElements = [
|
|
|
- ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
|
|
|
|
|
- ['Nitrate-N', $r['NO3_N'], 10, 20],
|
|
|
|
|
- ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
|
|
|
|
|
- ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
|
|
|
|
|
- ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
|
|
|
|
|
- ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
|
|
|
|
|
- ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
|
|
|
|
|
- ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
|
|
|
|
|
- ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
|
|
|
|
|
- ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
|
|
|
|
|
|
|
+ ['pH (H2O)', $r['ph_h2o'], 6.2, 6.8],
|
|
|
|
|
+ ['Nitrate-N', $r['NO3_N'], 10, 20],
|
|
|
|
|
+ ['Calcium (ppm)', $r['BS_ca_ppm'], sv($s,$r,'ca_ppm_min'), sv($s,$r,'ca_ppm_max')],
|
|
|
|
|
+ ['Magnesium (ppm)', $r['BS_mg_ppm'], sv($s,$r,'mg_ppm_min'), sv($s,$r,'mg_ppm_max')],
|
|
|
|
|
+ ['Potassium (ppm)', $r['BS_k_ppm'], sv($s,$r,'k_ppm_min'), sv($s,$r,'k_ppm_max')],
|
|
|
|
|
+ ['Sodium (ppm)', $r['BS_na_ppm'], sv($s,$r,'na_ppm_min'), sv($s,$r,'na_ppm_max')],
|
|
|
|
|
+ ['Ca sat (%)', $r['BS_ca2'], sv($s,$r,'cabs_min'), sv($s,$r,'cabs_max')],
|
|
|
|
|
+ ['Mg sat (%)', $r['BS_mg2'], sv($s,$r,'mgbs_min'), sv($s,$r,'mgbs_max')],
|
|
|
|
|
+ ['K sat (%)', $r['BS_k'], sv($s,$r,'kbs_min'), sv($s,$r,'kbs_max')],
|
|
|
|
|
+ ['Na sat (%)', $r['BS_na'], sv($s,$r,'nabs_min'), sv($s,$r,'nabs_max')],
|
|
|
];
|
|
];
|
|
|
|
|
|
|
|
foreach ($checkElements as [$label, $val, $lo, $hi]) {
|
|
foreach ($checkElements as [$label, $val, $lo, $hi]) {
|
|
@@ -242,13 +243,13 @@ foreach ($checkElements as [$label, $val, $lo, $hi]) {
|
|
|
if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
|
|
if (is_numeric($hi) && $v > (float)$hi) $excesses[] = $label;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-$soilData .= "\nDeficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies));
|
|
|
|
|
-$soilData .= "\nIn Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses));
|
|
|
|
|
-$soilData .= "\n=====================================\n";
|
|
|
|
|
|
|
+$soilData .= "Deficient: " . (empty($deficiencies) ? 'None detected' : implode(', ', $deficiencies)) . "\n";
|
|
|
|
|
+$soilData .= "In Excess: " . (empty($excesses) ? 'None detected' : implode(', ', $excesses)) . "\n";
|
|
|
|
|
+$soilData .= "=====================================\n";
|
|
|
|
|
|
|
|
-// ── RAG: embed the soil data query, retrieve relevant book passages ───────────
|
|
|
|
|
|
|
+// ── RAG: retrieve relevant passages from knowledge_chunks ─────────────────────
|
|
|
|
|
+$ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
|
|
|
$knowledgeContext = '';
|
|
$knowledgeContext = '';
|
|
|
-$ragChunks = retrieveRelevantChunks($pdo, $soilData, $section, RAG_TOP_K);
|
|
|
|
|
|
|
|
|
|
if (!empty($ragChunks)) {
|
|
if (!empty($ragChunks)) {
|
|
|
$knowledgeContext = "\n\n===================================================\n"
|
|
$knowledgeContext = "\n\n===================================================\n"
|
|
@@ -267,34 +268,30 @@ if (!empty($ragChunks)) {
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-// ── Section-specific system prompts ──────────────────────────────────────────
|
|
|
|
|
-$systemInstruction = "You are a certified agronomist specialising in soil fertility, "
|
|
|
|
|
- . "trained in the Albrecht method of soil balancing. "
|
|
|
|
|
- . "You have deep knowledge of soil chemistry, plant nutrition, and the relationship "
|
|
|
|
|
- . "between soil mineral balance and crop/livestock health. "
|
|
|
|
|
- . "Always ground your recommendations in the measured data. "
|
|
|
|
|
|
|
+// ── Section-specific prompts ──────────────────────────────────────────────────
|
|
|
|
|
+$system = "You are a certified agronomist specialising in soil fertility, trained in the "
|
|
|
|
|
+ . "Albrecht method of mineral soil balancing. You have deep knowledge of soil chemistry, "
|
|
|
|
|
+ . "plant nutrition, and the relationship between soil mineral balance and crop and livestock health. "
|
|
|
|
|
+ . "Always ground your recommendations in the measured data provided. "
|
|
|
. "For Australian conditions, reference typical soil types and climate where relevant. "
|
|
. "For Australian conditions, reference typical soil types and climate where relevant. "
|
|
|
. "Write in a professional but accessible tone suitable for a farmer-facing report. "
|
|
. "Write in a professional but accessible tone suitable for a farmer-facing report. "
|
|
|
- . "When the knowledge passages conflict with your training, prefer the passages — they "
|
|
|
|
|
- . "are from authoritative soil science texts.";
|
|
|
|
|
|
|
+ . "When the knowledge passages conflict with your training, prefer the passages — "
|
|
|
|
|
+ . "they are from authoritative soil science texts.";
|
|
|
|
|
|
|
|
-$baseContext = $soilData . $knowledgeContext;
|
|
|
|
|
|
|
+$ctx = $soilData . $knowledgeContext;
|
|
|
|
|
|
|
|
$prompts = [
|
|
$prompts = [
|
|
|
-
|
|
|
|
|
'overview' =>
|
|
'overview' =>
|
|
|
- $systemInstruction . "\n\n" . $baseContext
|
|
|
|
|
- . "\n\nTASK: Write an executive overview of these soil test results (3–4 paragraphs). "
|
|
|
|
|
|
|
+ "{$system}\n\n{$ctx}\n\n"
|
|
|
|
|
+ . "TASK: Write an executive overview of these soil test results (3–4 paragraphs). "
|
|
|
. "Cover: (1) overall soil health and fertility level, "
|
|
. "Cover: (1) overall soil health and fertility level, "
|
|
|
. "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
|
|
. "(2) the most significant deficiencies or imbalances and their likely effect on crop performance, "
|
|
|
- . "(3) any positive attributes of this soil. "
|
|
|
|
|
- . "Use the Albrecht philosophy as a framework where applicable. "
|
|
|
|
|
- . "Do not list specific product names in this section.",
|
|
|
|
|
|
|
+ . "(3) any positive attributes. "
|
|
|
|
|
+ . "Use the Albrecht philosophy as a framework. Do not recommend specific product names.",
|
|
|
|
|
|
|
|
'ai_interpretation' =>
|
|
'ai_interpretation' =>
|
|
|
- $systemInstruction . "\n\n" . $baseContext
|
|
|
|
|
- . "\n\nTASK: Write a detailed technical interpretation of ALL elements in this soil test. "
|
|
|
|
|
- . "Structure your response with these sections:\n"
|
|
|
|
|
|
|
+ "{$system}\n\n{$ctx}\n\n"
|
|
|
|
|
+ . "TASK: Write a detailed technical interpretation structured with these headings:\n"
|
|
|
. "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
|
|
. "1. SOIL REACTION (pH, EC, Paramagnetic)\n"
|
|
|
. "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
|
|
. "2. ORGANIC MATTER & BIOLOGY (C, N, C:N ratio)\n"
|
|
|
. "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
|
|
. "3. CATION EXCHANGE CAPACITY & BASE SATURATIONS\n"
|
|
@@ -302,29 +299,26 @@ $prompts = [
|
|
|
. "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
|
|
. "5. TRACE ELEMENTS (S, B, Mn, Cu, Zn, Fe, Al, Si, Co, Mo, Se)\n"
|
|
|
. "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
|
|
. "6. ELEMENTAL RATIOS & INTERACTIONS (Ca:Mg, C:N, K:Mg antagonisms)\n"
|
|
|
. "7. OVERALL SOIL BALANCE ASSESSMENT\n"
|
|
. "7. OVERALL SOIL BALANCE ASSESSMENT\n"
|
|
|
- . "For each element marked [DEFICIENT] or [EXCESS], explain the agronomic significance "
|
|
|
|
|
|
|
+ . "For each element marked [DEFICIENT] or [EXCESS], explain agronomic significance "
|
|
|
. "and interactions with other elements. Reference the Albrecht literature where relevant.",
|
|
. "and interactions with other elements. Reference the Albrecht literature where relevant.",
|
|
|
|
|
|
|
|
'foliar' =>
|
|
'foliar' =>
|
|
|
- $systemInstruction . "\n\n" . $baseContext
|
|
|
|
|
- . "\n\nTASK: Design a foliar nutrition program to address the deficiencies shown. "
|
|
|
|
|
- . "Format the program as a table or numbered list with: "
|
|
|
|
|
- . "Growth Stage | Product Type | Active Element | Rate (L or kg/ha) | Timing/Frequency. "
|
|
|
|
|
|
|
+ "{$system}\n\n{$ctx}\n\n"
|
|
|
|
|
+ . "TASK: Design a foliar nutrition program to address the deficiencies shown. "
|
|
|
|
|
+ . "Format as a numbered list or table: "
|
|
|
|
|
+ . "Growth Stage | Product Type (generic) | Active Element | Rate (L or kg/ha) | Timing. "
|
|
|
. "Prioritise elements marked [DEFICIENT]. "
|
|
. "Prioritise elements marked [DEFICIENT]. "
|
|
|
- . "Note any antagonisms (e.g. Ca/Mg competition, Zn/P interaction, K/Mg lockout). "
|
|
|
|
|
- . "Keep product recommendations generic (e.g. 'chelated zinc', 'calcium nitrate') "
|
|
|
|
|
- . "rather than brand names. "
|
|
|
|
|
|
|
+ . "Note antagonisms (e.g. Ca/Mg competition, Zn/P, K/Mg lockout). "
|
|
|
. "Add a note on carrier water pH and adjuvant recommendations.",
|
|
. "Add a note on carrier water pH and adjuvant recommendations.",
|
|
|
|
|
|
|
|
'microbial' =>
|
|
'microbial' =>
|
|
|
- $systemInstruction . "\n\n" . $baseContext
|
|
|
|
|
- . "\n\nTASK: Design a biological/microbial soil improvement program. "
|
|
|
|
|
- . "Consider the organic matter level, C:N ratio, pH, and base saturation balance shown. "
|
|
|
|
|
|
|
+ "{$system}\n\n{$ctx}\n\n"
|
|
|
|
|
+ . "TASK: Design a biological/microbial soil improvement program. "
|
|
|
. "Structure your response:\n"
|
|
. "Structure your response:\n"
|
|
|
- . "1. CURRENT BIOLOGY ASSESSMENT (based on OM, C:N, pH)\n"
|
|
|
|
|
- . "2. RECOMMENDED INOCULANTS (e.g. mycorrhizae, rhizobia, EM, compost tea)\n"
|
|
|
|
|
|
|
+ . "1. CURRENT BIOLOGY ASSESSMENT (based on OM%, C:N ratio, pH)\n"
|
|
|
|
|
+ . "2. RECOMMENDED INOCULANTS (mycorrhizae, rhizobia, EM, compost tea etc.)\n"
|
|
|
. "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
|
|
. "3. CARBON FEEDING STRATEGY (humates, fish hydrolysate, molasses, cover crops)\n"
|
|
|
- . "4. TIMING & INTEGRATION with the soil balancing program\n"
|
|
|
|
|
|
|
+ . "4. TIMING & INTEGRATION with the mineral balancing program\n"
|
|
|
. "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
|
|
. "Reference Albrecht's work on the relationship between mineral balance and soil biology.",
|
|
|
];
|
|
];
|
|
|
|
|
|
|
@@ -334,8 +328,11 @@ $payload = json_encode([
|
|
|
'prompt' => $prompts[$section],
|
|
'prompt' => $prompts[$section],
|
|
|
'stream' => false,
|
|
'stream' => false,
|
|
|
'options' => [
|
|
'options' => [
|
|
|
- 'temperature' => 0.3, // lower = more factual / less creative
|
|
|
|
|
- 'num_predict' => 2048,
|
|
|
|
|
|
|
+ 'temperature' => 0.3,
|
|
|
|
|
+ 'num_predict' => 2048,
|
|
|
|
|
+ 'num_ctx' => 6144,
|
|
|
|
|
+ 'repeat_penalty' => 1.1,
|
|
|
|
|
+ 'keep_alive' => -1, // keep model resident between requests
|
|
|
],
|
|
],
|
|
|
]);
|
|
]);
|
|
|
|
|
|
|
@@ -376,53 +373,41 @@ if ($text === '') {
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
echo json_encode([
|
|
echo json_encode([
|
|
|
- 'success' => true,
|
|
|
|
|
- 'text' => $text,
|
|
|
|
|
- 'rag_chunks_used' => count($ragChunks),
|
|
|
|
|
|
|
+ 'success' => true,
|
|
|
|
|
+ 'text' => $text,
|
|
|
|
|
+ 'rag_chunks_used' => count($ragChunks),
|
|
|
]);
|
|
]);
|
|
|
exit;
|
|
exit;
|
|
|
|
|
|
|
|
-// ── RAG retrieval ────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
+// ── RAG: retrieve relevant knowledge chunks from MySQL ────────────────────────
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * Embed a query string, then retrieve the top-K most similar knowledge chunks.
|
|
|
|
|
- * Falls back to MySQL FULLTEXT search if no embeddings are in the table or
|
|
|
|
|
- * if the embedding API is unavailable.
|
|
|
|
|
- *
|
|
|
|
|
- * @param PDO $pdo
|
|
|
|
|
- * @param string $queryText The soil data summary used as the retrieval query
|
|
|
|
|
- * @param string $section Current section (used to build keyword fallback)
|
|
|
|
|
- * @param int $topK
|
|
|
|
|
- * @return array Array of row arrays (source, author, page, chunk_text)
|
|
|
|
|
- */
|
|
|
|
|
function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
|
|
function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, int $topK): array
|
|
|
{
|
|
{
|
|
|
- // Check if we have any chunks at all
|
|
|
|
|
- $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
|
|
|
|
|
|
|
+ try {
|
|
|
|
|
+ $count = (int)$pdo->query('SELECT COUNT(*) FROM knowledge_chunks')->fetchColumn();
|
|
|
|
|
+ } catch (PDOException $e) {
|
|
|
|
|
+ return []; // Table doesn't exist yet
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
if ($count === 0) {
|
|
if ($count === 0) {
|
|
|
- return []; // Knowledge base not yet populated
|
|
|
|
|
|
|
+ return []; // Knowledge base not yet populated — run ingest_knowledge.php
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // ── Try vector similarity search first ──────────────────────────────────
|
|
|
|
|
|
|
+ // Try vector similarity first
|
|
|
$queryEmbedding = getQueryEmbedding($queryText);
|
|
$queryEmbedding = getQueryEmbedding($queryText);
|
|
|
-
|
|
|
|
|
if ($queryEmbedding !== null) {
|
|
if ($queryEmbedding !== null) {
|
|
|
return vectorSearch($pdo, $queryEmbedding, $topK);
|
|
return vectorSearch($pdo, $queryEmbedding, $topK);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // ── Fallback: MySQL FULLTEXT search ─────────────────────────────────────
|
|
|
|
|
|
|
+ // Fallback: MySQL FULLTEXT search
|
|
|
return fulltextSearch($pdo, $section, $topK);
|
|
return fulltextSearch($pdo, $section, $topK);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * Embed text via Ollama. Tries new /api/embed first, falls back to legacy
|
|
|
|
|
- * /api/embeddings. Returns float[] or null on failure.
|
|
|
|
|
- */
|
|
|
|
|
function getQueryEmbedding(string $text): ?array
|
|
function getQueryEmbedding(string $text): ?array
|
|
|
{
|
|
{
|
|
|
$queryText = substr($text, 0, 2000);
|
|
$queryText = substr($text, 0, 2000);
|
|
|
|
|
|
|
|
- // ── New API (/api/embed, Ollama >= 0.1.26) ───────────────────────────────
|
|
|
|
|
|
|
+ // Try new /api/embed (Ollama >= 0.1.26) first
|
|
|
$ch = curl_init(OLLAMA_HOST . '/api/embed');
|
|
$ch = curl_init(OLLAMA_HOST . '/api/embed');
|
|
|
curl_setopt_array($ch, [
|
|
curl_setopt_array($ch, [
|
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POST => true,
|
|
@@ -442,7 +427,7 @@ function getQueryEmbedding(string $text): ?array
|
|
|
if (is_array($emb) && count($emb) > 0) return $emb;
|
|
if (is_array($emb) && count($emb) > 0) return $emb;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // ── Legacy API (/api/embeddings) ─────────────────────────────────────────
|
|
|
|
|
|
|
+ // Fallback: legacy /api/embeddings
|
|
|
$ch = curl_init(OLLAMA_HOST . '/api/embeddings');
|
|
$ch = curl_init(OLLAMA_HOST . '/api/embeddings');
|
|
|
curl_setopt_array($ch, [
|
|
curl_setopt_array($ch, [
|
|
|
CURLOPT_POST => true,
|
|
CURLOPT_POST => true,
|
|
@@ -465,25 +450,16 @@ function getQueryEmbedding(string $text): ?array
|
|
|
return null;
|
|
return null;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * Load all chunk embeddings from DB, compute cosine similarity, return top-K.
|
|
|
|
|
- * For corpora up to ~10k chunks this is fast enough in PHP.
|
|
|
|
|
- */
|
|
|
|
|
function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
|
|
function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
|
|
|
{
|
|
{
|
|
|
- $stmt = $pdo->query(
|
|
|
|
|
- 'SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks'
|
|
|
|
|
- );
|
|
|
|
|
-
|
|
|
|
|
|
|
+ $stmt = $pdo->query('SELECT id, source, author, page, chunk_text, embedding FROM knowledge_chunks');
|
|
|
$scores = [];
|
|
$scores = [];
|
|
|
|
|
|
|
|
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
|
|
while ($row = $stmt->fetch(PDO::FETCH_ASSOC)) {
|
|
|
$chunkVec = json_decode($row['embedding'], true);
|
|
$chunkVec = json_decode($row['embedding'], true);
|
|
|
if (!is_array($chunkVec)) continue;
|
|
if (!is_array($chunkVec)) continue;
|
|
|
-
|
|
|
|
|
- $sim = cosineSimilarity($queryVec, $chunkVec);
|
|
|
|
|
$scores[] = [
|
|
$scores[] = [
|
|
|
- 'score' => $sim,
|
|
|
|
|
|
|
+ 'score' => cosineSimilarity($queryVec, $chunkVec),
|
|
|
'source' => $row['source'],
|
|
'source' => $row['source'],
|
|
|
'author' => $row['author'],
|
|
'author' => $row['author'],
|
|
|
'page' => $row['page'],
|
|
'page' => $row['page'],
|
|
@@ -491,24 +467,18 @@ function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
|
|
|
];
|
|
];
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- // Sort descending by score, return top-K
|
|
|
|
|
usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
usort($scores, fn($a, $b) => $b['score'] <=> $a['score']);
|
|
|
return array_slice($scores, 0, $topK);
|
|
return array_slice($scores, 0, $topK);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * MySQL FULLTEXT fallback when embeddings aren't available.
|
|
|
|
|
- */
|
|
|
|
|
function fulltextSearch(PDO $pdo, string $section, int $topK): array
|
|
function fulltextSearch(PDO $pdo, string $section, int $topK): array
|
|
|
{
|
|
{
|
|
|
- // Section-specific keyword hints for the search
|
|
|
|
|
$keywords = [
|
|
$keywords = [
|
|
|
- 'overview' => 'soil fertility mineral balance calcium magnesium',
|
|
|
|
|
|
|
+ 'overview' => 'soil fertility mineral balance calcium magnesium albrecht',
|
|
|
'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
|
|
'ai_interpretation' => 'base saturation calcium magnesium potassium pH organic matter',
|
|
|
'foliar' => 'foliar nutrition trace elements deficiency correction spray',
|
|
'foliar' => 'foliar nutrition trace elements deficiency correction spray',
|
|
|
'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
|
|
'microbial' => 'soil biology microbial organic matter carbon nitrogen humus',
|
|
|
];
|
|
];
|
|
|
-
|
|
|
|
|
$query = $keywords[$section] ?? 'soil fertility mineral nutrition';
|
|
$query = $keywords[$section] ?? 'soil fertility mineral nutrition';
|
|
|
|
|
|
|
|
try {
|
|
try {
|
|
@@ -528,22 +498,15 @@ function fulltextSearch(PDO $pdo, string $section, int $topK): array
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-/**
|
|
|
|
|
- * Cosine similarity between two equal-length float vectors.
|
|
|
|
|
- */
|
|
|
|
|
function cosineSimilarity(array $a, array $b): float
|
|
function cosineSimilarity(array $a, array $b): float
|
|
|
{
|
|
{
|
|
|
- $dot = 0.0;
|
|
|
|
|
- $normA = 0.0;
|
|
|
|
|
- $normB = 0.0;
|
|
|
|
|
|
|
+ $dot = $normA = $normB = 0.0;
|
|
|
$len = min(count($a), count($b));
|
|
$len = min(count($a), count($b));
|
|
|
-
|
|
|
|
|
for ($i = 0; $i < $len; $i++) {
|
|
for ($i = 0; $i < $len; $i++) {
|
|
|
$dot += $a[$i] * $b[$i];
|
|
$dot += $a[$i] * $b[$i];
|
|
|
$normA += $a[$i] * $a[$i];
|
|
$normA += $a[$i] * $a[$i];
|
|
|
$normB += $b[$i] * $b[$i];
|
|
$normB += $b[$i] * $b[$i];
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
$denom = sqrt($normA) * sqrt($normB);
|
|
$denom = sqrt($normA) * sqrt($normB);
|
|
|
return $denom > 0 ? $dot / $denom : 0.0;
|
|
return $denom > 0 ? $dot / $denom : 0.0;
|
|
|
}
|
|
}
|