|
|
@@ -2,28 +2,30 @@
|
|
|
/**
|
|
|
* controllers/soilImportController.php
|
|
|
*
|
|
|
- * Handles XLS/XLSX/CSV upload from soil labs, parses the file with
|
|
|
- * PhpSpreadsheet, then uses the local Ollama LLM to map lab-specific
|
|
|
- * column headers to the soil_records database fields.
|
|
|
+ * Handles XLS/XLSX/CSV upload from soil labs.
|
|
|
*
|
|
|
* POST /controllers/soilImportController.php
|
|
|
- * Accepts multipart/form-data with:
|
|
|
- * file — the uploaded spreadsheet
|
|
|
- * action — "parse" → return list of samples found in the file
|
|
|
- * "import" → return mapped field values for one sample
|
|
|
- * sample_idx — (import only) 0-based index of the sample to import
|
|
|
+ * file — multipart upload
|
|
|
+ * lab — lab identifier: "csbp" | "generic"
|
|
|
+ * action — "parse" → detect lab, return sample list
|
|
|
+ * "import_one" → map + return fields for one sample (form pre-fill)
|
|
|
+ * "import_bulk" → save all (or selected) samples directly to DB
|
|
|
+ * sample_idx — (import_one) 0-based index
|
|
|
+ * samples — (import_bulk) JSON array of sample objects with overrides applied by user
|
|
|
+ * client_id — (import_bulk) client_records.id to link records to
|
|
|
*/
|
|
|
|
|
|
require_once __DIR__ . '/../config/database.php';
|
|
|
require_once __DIR__ . '/../config/ai.php';
|
|
|
require_once __DIR__ . '/../lib/auth.php';
|
|
|
+require_once __DIR__ . '/../lib/csrf.php';
|
|
|
+require_once __DIR__ . '/labParsers/csbp.php';
|
|
|
|
|
|
if (session_status() === PHP_SESSION_NONE) {
|
|
|
session_start();
|
|
|
}
|
|
|
|
|
|
requireLogin();
|
|
|
-
|
|
|
header('Content-Type: application/json');
|
|
|
|
|
|
// ─── helpers ─────────────────────────────────────────────────────────────────
|
|
|
@@ -48,7 +50,7 @@ if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
|
|
|
}
|
|
|
|
|
|
$action = $_POST['action'] ?? 'parse';
|
|
|
-if (!in_array($action, ['parse', 'import'], true)) {
|
|
|
+if (!in_array($action, ['parse', 'import_one', 'import_bulk'], true)) {
|
|
|
jsonError('Invalid action');
|
|
|
}
|
|
|
|
|
|
@@ -67,24 +69,15 @@ if (!in_array($ext, ['xls', 'xlsx', 'csv', 'ods'], true)) {
|
|
|
|
|
|
// ─── PhpSpreadsheet ───────────────────────────────────────────────────────────
|
|
|
|
|
|
-$autoloadPaths = [
|
|
|
- __DIR__ . '/../vendor/autoload.php',
|
|
|
- __DIR__ . '/../../vendor/autoload.php',
|
|
|
-];
|
|
|
-$autoloaded = false;
|
|
|
-foreach ($autoloadPaths as $path) {
|
|
|
- if (file_exists($path)) {
|
|
|
- require_once $path;
|
|
|
- $autoloaded = true;
|
|
|
- break;
|
|
|
- }
|
|
|
+foreach ([__DIR__ . '/../vendor/autoload.php', __DIR__ . '/../../vendor/autoload.php'] as $p) {
|
|
|
+ if (file_exists($p)) { require_once $p; break; }
|
|
|
}
|
|
|
-if (!$autoloaded) {
|
|
|
- jsonError('PhpSpreadsheet not installed. Run: composer require phpoffice/phpspreadsheet', 500);
|
|
|
+
|
|
|
+if (!class_exists('\PhpOffice\PhpSpreadsheet\IOFactory')) {
|
|
|
+ jsonError('PhpSpreadsheet not installed. Run: composer install', 500);
|
|
|
}
|
|
|
|
|
|
use PhpOffice\PhpSpreadsheet\IOFactory;
|
|
|
-use PhpOffice\PhpSpreadsheet\Spreadsheet;
|
|
|
|
|
|
try {
|
|
|
$spreadsheet = IOFactory::load($uploadedFile['tmp_name']);
|
|
|
@@ -92,16 +85,13 @@ try {
|
|
|
jsonError('Could not read file: ' . $e->getMessage());
|
|
|
}
|
|
|
|
|
|
-$sheet = $spreadsheet->getActiveSheet();
|
|
|
-
|
|
|
-// Convert sheet to a 2-D array (1-indexed rows and cols → 0-indexed)
|
|
|
+$sheet = $spreadsheet->getActiveSheet();
|
|
|
$rawData = [];
|
|
|
foreach ($sheet->getRowIterator() as $row) {
|
|
|
$cells = [];
|
|
|
foreach ($row->getCellIterator() as $cell) {
|
|
|
$cells[] = trim((string) $cell->getFormattedValue());
|
|
|
}
|
|
|
- // Strip trailing empty cells
|
|
|
while ($cells && end($cells) === '') {
|
|
|
array_pop($cells);
|
|
|
}
|
|
|
@@ -114,39 +104,188 @@ if (empty($rawData)) {
|
|
|
jsonError('The spreadsheet appears to be empty.');
|
|
|
}
|
|
|
|
|
|
-// ─── format detection ─────────────────────────────────────────────────────────
|
|
|
+// ─── Lab detection ────────────────────────────────────────────────────────────
|
|
|
//
|
|
|
-// Two layouts found in CSBP lab files:
|
|
|
-//
|
|
|
-// TRANSPOSED (lab card) — Column 0 = row labels ("EC 1:5", "Total P %", …)
|
|
|
-// Columns 1-N = one sample each.
|
|
|
-// Example: SOIL CONTROL XNS06189.xls
|
|
|
-//
|
|
|
-// ROW-BASED (report) — One row = column headers; subsequent rows = samples.
|
|
|
-// May have 1-3 title/subtitle rows above the headers.
|
|
|
-// Example: S-C Soil Tests 2006.xls, YOS06 42-48.xlsx
|
|
|
-//
|
|
|
-// Detection strategy:
|
|
|
-// 1. Score column-0 values for soil-chemistry label patterns (units, element
|
|
|
-// names, "1:5", "ppm", etc.). ≥2 matches → transposed.
|
|
|
-// 2. Otherwise scan the first 10 rows for a "header row" — the row that best
|
|
|
-// matches known CSBP column-code keywords. Everything above it is a title.
|
|
|
+// Client can tell us the lab via POST['lab'], or we auto-detect.
|
|
|
|
|
|
-/**
|
|
|
- * Returns ['transposed' => bool, 'headerRow' => int]
|
|
|
- */
|
|
|
-function detectFormat(array $rawData): array
|
|
|
+$lab = strtolower(trim($_POST['lab'] ?? 'auto'));
|
|
|
+
|
|
|
+if ($lab === 'auto' || $lab === '') {
|
|
|
+ if (csbpDetect($rawData)) {
|
|
|
+ $lab = 'csbp';
|
|
|
+ } else {
|
|
|
+ $lab = 'generic';
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+// ─── Parse into samples ───────────────────────────────────────────────────────
|
|
|
+
|
|
|
+if ($lab === 'csbp') {
|
|
|
+ $samples = csbpParse($rawData);
|
|
|
+} else {
|
|
|
+ $samples = genericParse($rawData);
|
|
|
+}
|
|
|
+
|
|
|
+if (empty($samples)) {
|
|
|
+ jsonError('No samples found in the file. Check the correct lab is selected.');
|
|
|
+}
|
|
|
+
|
|
|
+// ─── action: parse ────────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+if ($action === 'parse') {
|
|
|
+ $list = [];
|
|
|
+ foreach ($samples as $idx => $s) {
|
|
|
+ $list[] = [
|
|
|
+ 'idx' => $idx,
|
|
|
+ 'lab_no' => $s['lab_no'] ?? 'Sample ' . ($idx + 1),
|
|
|
+ 'sample_id' => $s['sample_id'] ?? '', // paddock
|
|
|
+ 'client' => $s['client_name'] ?? '',
|
|
|
+ 'crop' => $s['crop_type'] ?? '',
|
|
|
+ ];
|
|
|
+ }
|
|
|
+ jsonOk(['samples' => $list, 'count' => count($samples), 'lab' => $lab]);
|
|
|
+}
|
|
|
+
|
|
|
+// ─── action: import_one ───────────────────────────────────────────────────────
|
|
|
+// Returns mapped field values for a single sample to pre-fill the form.
|
|
|
+
|
|
|
+if ($action === 'import_one') {
|
|
|
+ $idx = (int) ($_POST['sample_idx'] ?? 0);
|
|
|
+ if ($idx < 0 || $idx >= count($samples)) {
|
|
|
+ jsonError('Invalid sample index.');
|
|
|
+ }
|
|
|
+
|
|
|
+ $fields = $samples[$idx];
|
|
|
+
|
|
|
+ // If generic lab, try AI mapping on top
|
|
|
+ if ($lab === 'generic') {
|
|
|
+ $fields = ollamaMap($fields);
|
|
|
+ }
|
|
|
+
|
|
|
+ $fields = array_filter($fields, fn($v) => $v !== null && $v !== '');
|
|
|
+ jsonOk(['fields' => $fields, 'method' => $lab === 'csbp' ? 'csbp' : 'ai']);
|
|
|
+}
|
|
|
+
|
|
|
+// ─── action: import_bulk ─────────────────────────────────────────────────────
|
|
|
+// Saves all samples (with user-confirmed paddock IDs) directly to soil_records.
|
|
|
+
|
|
|
+if ($action === 'import_bulk') {
|
|
|
+ $clientId = (int) ($_POST['client_id'] ?? 0);
|
|
|
+ $userId = (int) getCurrentUserId();
|
|
|
+ $confirmedJson = $_POST['samples'] ?? '[]';
|
|
|
+ $confirmed = json_decode($confirmedJson, true);
|
|
|
+
|
|
|
+ if (!is_array($confirmed) || empty($confirmed)) {
|
|
|
+ // Fall back to all parsed samples
|
|
|
+ $confirmed = $samples;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (!$clientId) {
|
|
|
+ jsonError('Please select a client before bulk importing.');
|
|
|
+ }
|
|
|
+
|
|
|
+ $pdo = getDBConnection();
|
|
|
+ $inserted = 0;
|
|
|
+ $skipped = [];
|
|
|
+
|
|
|
+ foreach ($confirmed as $s) {
|
|
|
+ // Require at minimum a lab number or sample ID
|
|
|
+ if (empty($s['lab_no']) && empty($s['sample_id'])) {
|
|
|
+ $skipped[] = 'Row missing lab number and paddock';
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Avoid duplicates: skip if this lab_no already exists for this client
|
|
|
+ if (!empty($s['lab_no'])) {
|
|
|
+ $dup = $pdo->prepare("
|
|
|
+ SELECT id FROM soil_records
|
|
|
+ WHERE lab_no = ? AND CAST(client_records_id AS UNSIGNED) = ?
|
|
|
+ LIMIT 1
|
|
|
+ ");
|
|
|
+ $dup->execute([$s['lab_no'], $clientId]);
|
|
|
+ if ($dup->fetch()) {
|
|
|
+ $skipped[] = ($s['lab_no'] ?? '') . ' (duplicate)';
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ $rand = (string) (mt_rand(1000, 9999) / 1000); // matches existing rand pattern
|
|
|
+
|
|
|
+ $stmt = $pdo->prepare("
|
|
|
+ INSERT INTO soil_records (
|
|
|
+ client_records_id, modx_user_id, date, client_name,
|
|
|
+ analysis_type, lab_no, batch_no, sample_id, site_id,
|
|
|
+ crop_type, soil_type, date_sampled,
|
|
|
+ texture, gravel, colour,
|
|
|
+ ph_cacl2, ph_h2o, ec, ocarbon, omatter, paramag,
|
|
|
+ NO3_N, NH3_N,
|
|
|
+ p_mehlick, p_bray2, p_morgan,
|
|
|
+ k_morgan, ca_morgan, mg_morgan, na_morgan, s_morgan,
|
|
|
+ b_cacl2, mn_dtpa, zn_dtpa, fe_dtpa, cu_dtpa, al, se,
|
|
|
+ tec, cec, ca_mehlick3, mg_mehlick3, k_mehlick3, na_mehlick3, al_mehlick3,
|
|
|
+ rand, status
|
|
|
+ ) VALUES (
|
|
|
+ ?, ?, NOW(), ?,
|
|
|
+ ?, ?, ?, ?, ?,
|
|
|
+ ?, ?, ?,
|
|
|
+ ?, ?, ?,
|
|
|
+ ?, ?, ?, ?, ?, ?,
|
|
|
+ ?, ?,
|
|
|
+ ?, ?, ?,
|
|
|
+ ?, ?, ?, ?, ?,
|
|
|
+ ?, ?, ?, ?, ?, ?, ?,
|
|
|
+ ?, ?, ?, ?, ?, ?, ?,
|
|
|
+ ?, '0'
|
|
|
+ )
|
|
|
+ ");
|
|
|
+
|
|
|
+ $n = fn(string $key) => isset($s[$key]) && $s[$key] !== '' ? (float) $s[$key] : null;
|
|
|
+ $t = fn(string $key) => $s[$key] ?? null;
|
|
|
+
|
|
|
+ $stmt->execute([
|
|
|
+ $clientId, $userId, $t('client_name'),
|
|
|
+ $t('analysis_type'), $t('lab_no'), $t('batch_no'), $t('sample_id'), $t('site_id'),
|
|
|
+ $t('crop_type'), $t('soil_type'), $t('date_sampled'),
|
|
|
+ $t('texture'), $n('gravel'), $t('colour'),
|
|
|
+ $n('ph_cacl2'), $n('ph_h2o'), $n('ec'), $n('ocarbon'), $n('omatter'), $n('paramag'),
|
|
|
+ $n('NO3_N'), $n('NH3_N'),
|
|
|
+ $n('p_mehlick'), $n('p_bray2'), $n('p_morgan'),
|
|
|
+ $n('k_morgan'), $n('ca_morgan'), $n('mg_morgan'), $n('na_morgan'), $n('s_morgan'),
|
|
|
+ $n('b_cacl2'), $n('mn_dtpa'), $n('zn_dtpa'), $n('fe_dtpa'), $n('cu_dtpa'), $n('al'), $n('se'),
|
|
|
+ $n('tec'), $n('cec'), $n('ca_mehlick3'), $n('mg_mehlick3'), $n('k_mehlick3'), $n('na_mehlick3'), $n('al_mehlick3'),
|
|
|
+ $rand,
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $inserted++;
|
|
|
+ }
|
|
|
+
|
|
|
+ jsonOk([
|
|
|
+ 'inserted' => $inserted,
|
|
|
+ 'skipped' => $skipped,
|
|
|
+ 'message' => "{$inserted} sample" . ($inserted !== 1 ? 's' : '') . " imported successfully."
|
|
|
+ . (count($skipped) ? ' Skipped: ' . implode(', ', $skipped) : ''),
|
|
|
+ ]);
|
|
|
+}
|
|
|
+
|
|
|
+// ─── Generic parser (non-CSBP files) ─────────────────────────────────────────
|
|
|
+
|
|
|
+function genericParse(array $rawData): array
|
|
|
+{
|
|
|
+ // Reuse the original detection + extraction logic for unknown labs
|
|
|
+ $fmt = detectGenericFormat($rawData);
|
|
|
+ return $fmt['transposed']
|
|
|
+ ? extractTransposed($rawData)
|
|
|
+ : extractRowBased($rawData, $fmt['headerRow']);
|
|
|
+}
|
|
|
+
|
|
|
+function detectGenericFormat(array $rawData): array
|
|
|
{
|
|
|
- // Phrases that appear in column-0 of a transposed lab card
|
|
|
$transposedSignals = [
|
|
|
'1:5', 'total p', 'total k', 'total ca', 'total mg', 'total na',
|
|
|
'total s', 'total n', 'total b', 'total zn', 'total mn', 'total fe',
|
|
|
- 'total cu', 'total cl', 'organic matter', 'organic carbon',
|
|
|
- 'lab id', 'lab performing', 'field name', 'nitrate ppm',
|
|
|
- 'ph 1:5', 'moisture %', 'consultant',
|
|
|
+ 'total cu', 'organic matter', 'organic carbon', 'lab id', 'nitrate ppm',
|
|
|
+ 'ph 1:5', 'moisture %', 'consultant', 'field name',
|
|
|
];
|
|
|
-
|
|
|
- // Phrases that appear in the header row of a row-based file
|
|
|
$rowBasedSignals = [
|
|
|
'lab_number', 'custno', 'paddock', 'ph_cacl2', 'ph_h2o',
|
|
|
'dtpa_cu', 'dtpa_zn', 'dtpa_mn', 'dtpa_fe', 'conducty',
|
|
|
@@ -156,309 +295,135 @@ function detectFormat(array $rawData): array
|
|
|
'sat_k', 'sat_na', 'crop', 'sp%',
|
|
|
];
|
|
|
|
|
|
- // Step 1: score column-0 values for transposed signals
|
|
|
$transposedScore = 0;
|
|
|
foreach (array_slice($rawData, 0, 20) as $row) {
|
|
|
$cell = strtolower($row[0] ?? '');
|
|
|
- if ($cell === '') {
|
|
|
- continue;
|
|
|
- }
|
|
|
- foreach ($transposedSignals as $signal) {
|
|
|
- if (str_contains($cell, $signal)) {
|
|
|
- $transposedScore++;
|
|
|
- break;
|
|
|
- }
|
|
|
+ foreach ($transposedSignals as $sig) {
|
|
|
+ if (str_contains($cell, $sig)) { $transposedScore++; break; }
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
if ($transposedScore >= 2) {
|
|
|
return ['transposed' => true, 'headerRow' => 0];
|
|
|
}
|
|
|
|
|
|
- // Step 2: find the best header row in a row-based file
|
|
|
- $bestRow = 0;
|
|
|
- $bestScore = 0;
|
|
|
+ $bestRow = 0; $bestScore = 0;
|
|
|
for ($i = 0; $i < min(10, count($rawData)); $i++) {
|
|
|
$score = 0;
|
|
|
foreach ($rawData[$i] as $cell) {
|
|
|
$cell = strtolower(trim($cell));
|
|
|
- if ($cell === '') {
|
|
|
- continue;
|
|
|
+ foreach ($rowBasedSignals as $sig) {
|
|
|
+ if (str_contains($cell, $sig)) { $score++; break; }
|
|
|
}
|
|
|
- foreach ($rowBasedSignals as $signal) {
|
|
|
- if (str_contains($cell, $signal)) {
|
|
|
- $score++;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if ($score > $bestScore) {
|
|
|
- $bestScore = $score;
|
|
|
- $bestRow = $i;
|
|
|
}
|
|
|
+ if ($score > $bestScore) { $bestScore = $score; $bestRow = $i; }
|
|
|
}
|
|
|
-
|
|
|
return ['transposed' => false, 'headerRow' => $bestRow];
|
|
|
}
|
|
|
|
|
|
-// ─── extract samples ──────────────────────────────────────────────────────────
|
|
|
-
|
|
|
-function extractSamplesRowBased(array $rawData, int $headerRow): array
|
|
|
+function extractRowBased(array $rawData, int $headerRow): array
|
|
|
{
|
|
|
$headers = $rawData[$headerRow];
|
|
|
$samples = [];
|
|
|
for ($r = $headerRow + 1; $r < count($rawData); $r++) {
|
|
|
- $row = $rawData[$r];
|
|
|
- $sample = [];
|
|
|
+ $row = $rawData[$r]; $sample = [];
|
|
|
foreach ($headers as $c => $header) {
|
|
|
- if ($header === '') {
|
|
|
- continue;
|
|
|
- }
|
|
|
+ if ($header === '') continue;
|
|
|
$sample[$header] = $row[$c] ?? '';
|
|
|
}
|
|
|
- if (array_filter($sample)) {
|
|
|
- $samples[] = $sample;
|
|
|
- }
|
|
|
+ if (count(array_filter($sample)) >= 3) $samples[] = $sample;
|
|
|
}
|
|
|
return $samples;
|
|
|
}
|
|
|
|
|
|
-function extractSamplesTransposed(array $rawData): array
|
|
|
+function extractTransposed(array $rawData): array
|
|
|
{
|
|
|
- // Column 0 = labels; columns 1-N = samples.
|
|
|
- // Count sample columns from the widest row.
|
|
|
- $labels = array_column($rawData, 0);
|
|
|
- $maxCols = max(array_map('count', $rawData));
|
|
|
- $samples = [];
|
|
|
-
|
|
|
+ $labels = array_column($rawData, 0);
|
|
|
+ $maxCols = max(array_map('count', $rawData));
|
|
|
+ $samples = [];
|
|
|
for ($col = 1; $col < $maxCols; $col++) {
|
|
|
$sample = [];
|
|
|
- foreach ($rawData as $rowIdx => $row) {
|
|
|
- $label = trim($labels[$rowIdx] ?? '');
|
|
|
+ foreach ($rawData as $ri => $row) {
|
|
|
+ $label = trim($labels[$ri] ?? '');
|
|
|
$value = trim($row[$col] ?? '');
|
|
|
- if ($label !== '' && $value !== '') {
|
|
|
- $sample[$label] = $value;
|
|
|
- }
|
|
|
- }
|
|
|
- // Only keep columns that have at least a few populated cells
|
|
|
- if (count(array_filter($sample)) >= 3) {
|
|
|
- $samples[] = $sample;
|
|
|
+ if ($label !== '' && $value !== '') $sample[$label] = $value;
|
|
|
}
|
|
|
+ if (count(array_filter($sample)) >= 3) $samples[] = $sample;
|
|
|
}
|
|
|
return $samples;
|
|
|
}
|
|
|
|
|
|
-$fmt = detectFormat($rawData);
|
|
|
-$samples = $fmt['transposed']
|
|
|
- ? extractSamplesTransposed($rawData)
|
|
|
- : extractSamplesRowBased($rawData, $fmt['headerRow']);
|
|
|
-
|
|
|
-if (empty($samples)) {
|
|
|
- jsonError('No samples found in the file.');
|
|
|
-}
|
|
|
-
|
|
|
-// ─── action: parse ────────────────────────────────────────────────────────────
|
|
|
-// Return a lightweight list of samples so the UI can let the user pick one.
|
|
|
+// ─── Ollama field mapping (generic lab fallback) ──────────────────────────────
|
|
|
|
|
|
-if ($action === 'parse') {
|
|
|
- $list = [];
|
|
|
- foreach ($samples as $idx => $sample) {
|
|
|
- // Try to find a meaningful display label
|
|
|
- $labId = $sample['LAB_NUMBER'] ?? $sample['Lab ID (Soil)'] ?? $sample['LAB_ID'] ?? "Sample " . ($idx + 1);
|
|
|
- $client = $sample['CLIENT NAME'] ?? $sample['Consultant'] ?? $sample['CUSTNO'] ?? '';
|
|
|
- $crop = $sample['CROP'] ?? $sample['Material (manure, sawdust, etc.)'] ?? '';
|
|
|
- $pad = $sample['PADDOCK'] ?? $sample['Field Name (Sample ID)'] ?? '';
|
|
|
-
|
|
|
- $list[] = [
|
|
|
- 'idx' => $idx,
|
|
|
- 'lab_id' => $labId,
|
|
|
- 'client' => $client,
|
|
|
- 'crop' => $crop,
|
|
|
- 'site' => $pad,
|
|
|
- ];
|
|
|
- }
|
|
|
- jsonOk([
|
|
|
- 'samples' => $list,
|
|
|
- 'count' => count($samples),
|
|
|
- 'format' => $fmt['transposed'] ? 'transposed' : 'row-based',
|
|
|
- 'header_row' => $fmt['headerRow'] ?? 0,
|
|
|
- ]);
|
|
|
-}
|
|
|
-
|
|
|
-// ─── action: import ───────────────────────────────────────────────────────────
|
|
|
-
|
|
|
-$sampleIdx = (int) ($_POST['sample_idx'] ?? 0);
|
|
|
-if ($sampleIdx < 0 || $sampleIdx >= count($samples)) {
|
|
|
- jsonError('Invalid sample index.');
|
|
|
-}
|
|
|
-
|
|
|
-$sampleData = $samples[$sampleIdx];
|
|
|
-
|
|
|
-// ─── Ollama field mapping ─────────────────────────────────────────────────────
|
|
|
-
|
|
|
-$labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE);
|
|
|
-
|
|
|
-$prompt = <<<EOT
|
|
|
-You are a soil laboratory data mapper. Your only job is to output a JSON object.
|
|
|
-
|
|
|
-Map the LAB DATA below to these TARGET FIELDS. Output ONLY the JSON object — no explanation, no markdown, no code fences.
|
|
|
-
|
|
|
-TARGET FIELDS:
|
|
|
-lab_no=Lab reference number/Lab ID
|
|
|
-sample_id=Sample identifier/paddock name/field name
|
|
|
-site_id=Site identifier/block/customer number
|
|
|
-date_sampled=Date sampled as YYYY-MM-DD
|
|
|
-texture=Soil texture description
|
|
|
-gravel=Gravel % (number only)
|
|
|
+function ollamaMap(array $sampleData): array
|
|
|
+{
|
|
|
+ $labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE);
|
|
|
+
|
|
|
+ $prompt = <<<EOT
|
|
|
+You are a soil laboratory data mapper. Output ONLY a JSON object — no explanation, no markdown.
|
|
|
+
|
|
|
+Map the LAB DATA to these TARGET FIELDS:
|
|
|
+lab_no=Lab reference number
|
|
|
+sample_id=Sample/paddock identifier
|
|
|
+site_id=Site/block identifier
|
|
|
+date_sampled=Date as YYYY-MM-DD
|
|
|
+texture=Soil texture
|
|
|
+gravel=Gravel % (number)
|
|
|
colour=Soil colour
|
|
|
-ocarbon=Organic carbon % (number only)
|
|
|
-omatter=Organic matter % LOI (number only)
|
|
|
-ph_cacl2=pH in CaCl2 (number only)
|
|
|
-ph_h2o=pH in water (number only)
|
|
|
-ec=Electrical conductivity dS/m (number only)
|
|
|
-NO3_N=Nitrate-N mg/kg (number only)
|
|
|
-NH3_N=Ammonium-N mg/kg (number only)
|
|
|
-p_mehlick=Phosphorus Mehlich-3 mg/kg (number only)
|
|
|
-p_morgan=Phosphorus extractable mg/kg (number only)
|
|
|
-k_morgan=Potassium mg/kg (number only)
|
|
|
-ca_morgan=Calcium mg/kg (number only)
|
|
|
-mg_morgan=Magnesium mg/kg (number only)
|
|
|
-na_morgan=Sodium mg/kg (number only)
|
|
|
-s_morgan=Sulphur mg/kg (number only)
|
|
|
-b_cacl2=Boron CaCl2 mg/kg (number only)
|
|
|
-mn_dtpa=Manganese DTPA mg/kg (number only)
|
|
|
-zn_dtpa=Zinc DTPA mg/kg (number only)
|
|
|
-fe_dtpa=Iron DTPA mg/kg (number only)
|
|
|
-cu_dtpa=Copper DTPA mg/kg (number only)
|
|
|
-al=Aluminium mg/kg (number only)
|
|
|
-tec=Total Exchange Capacity (number only)
|
|
|
-cec=CEC meq/100g (number only)
|
|
|
-ca_mehlick3=Calcium Mehlich-3 meq/100g (number only)
|
|
|
-mg_mehlick3=Magnesium Mehlich-3 meq/100g (number only)
|
|
|
-k_mehlick3=Potassium Mehlich-3 meq/100g (number only)
|
|
|
-na_mehlick3=Sodium Mehlich-3 meq/100g (number only)
|
|
|
-al_mehlick3=Aluminium Mehlich-3 meq/100g (number only)
|
|
|
+ocarbon=Organic carbon % (number)
|
|
|
+omatter=Organic matter % (number)
|
|
|
+ph_cacl2=pH CaCl2 (number)
|
|
|
+ph_h2o=pH water (number)
|
|
|
+ec=EC dS/m (number)
|
|
|
+NO3_N=Nitrate-N mg/kg (number)
|
|
|
+NH3_N=Ammonium-N mg/kg (number)
|
|
|
+p_morgan=Phosphorus mg/kg (number)
|
|
|
+k_morgan=Potassium mg/kg (number)
|
|
|
+ca_morgan=Calcium mg/kg (number)
|
|
|
+mg_morgan=Magnesium mg/kg (number)
|
|
|
+na_morgan=Sodium mg/kg (number)
|
|
|
+s_morgan=Sulphur mg/kg (number)
|
|
|
+b_cacl2=Boron mg/kg (number)
|
|
|
+mn_dtpa=Manganese mg/kg (number)
|
|
|
+zn_dtpa=Zinc mg/kg (number)
|
|
|
+fe_dtpa=Iron mg/kg (number)
|
|
|
+cu_dtpa=Copper mg/kg (number)
|
|
|
+al=Aluminium mg/kg (number)
|
|
|
+cec=CEC meq/100g (number)
|
|
|
|
|
|
LAB DATA: {$labJson}
|
|
|
|
|
|
-Rules: only use values present in the lab data. Strip units from numbers. Use null for unmapped fields. Output JSON only.
|
|
|
+Rules: only use values in the data. Strip units. Use null for unmapped. Output JSON only.
|
|
|
EOT;
|
|
|
|
|
|
-$payload = json_encode([
|
|
|
- 'model' => OLLAMA_MODEL,
|
|
|
- 'prompt' => $prompt,
|
|
|
- 'stream' => false,
|
|
|
- 'options' => [
|
|
|
- 'temperature' => OLLAMA_TEMPERATURE,
|
|
|
- 'num_predict' => 1024,
|
|
|
- ],
|
|
|
-]);
|
|
|
-
|
|
|
-$ch = curl_init(OLLAMA_HOST . '/api/generate');
|
|
|
-curl_setopt_array($ch, [
|
|
|
- CURLOPT_POST => true,
|
|
|
- CURLOPT_POSTFIELDS => $payload,
|
|
|
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
- CURLOPT_RETURNTRANSFER => true,
|
|
|
- CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
|
|
|
- CURLOPT_CONNECTTIMEOUT => 5,
|
|
|
-]);
|
|
|
-
|
|
|
-$response = curl_exec($ch);
|
|
|
-$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
-$curlErr = curl_error($ch);
|
|
|
-curl_close($ch);
|
|
|
-
|
|
|
-if ($curlErr || $httpCode !== 200) {
|
|
|
- $mapped = staticFieldMap($sampleData);
|
|
|
- $warning = $curlErr ?: "Ollama HTTP {$httpCode}";
|
|
|
- jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI unavailable: ' . $warning]);
|
|
|
-}
|
|
|
-
|
|
|
-$ollamaData = json_decode($response, true);
|
|
|
-$rawText = trim($ollamaData['response'] ?? '');
|
|
|
-
|
|
|
-if ($rawText === '') {
|
|
|
- $mapped = staticFieldMap($sampleData);
|
|
|
- jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'Ollama returned empty response']);
|
|
|
-}
|
|
|
-
|
|
|
-// Strip any markdown code fences the model might wrap around the JSON
|
|
|
-$rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
|
|
|
-$rawText = preg_replace('/\s*```$/m', '', $rawText);
|
|
|
-// Extract the first JSON object if the model added commentary
|
|
|
-if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) {
|
|
|
- $rawText = $m[0];
|
|
|
-}
|
|
|
-
|
|
|
-$mapped = json_decode($rawText, true);
|
|
|
-
|
|
|
-if (!is_array($mapped)) {
|
|
|
- $mapped = staticFieldMap($sampleData);
|
|
|
- jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI returned unparseable JSON']);
|
|
|
-}
|
|
|
-
|
|
|
-// Remove null/empty values
|
|
|
-$mapped = array_filter($mapped, fn($v) => $v !== null && $v !== '');
|
|
|
+ $payload = json_encode([
|
|
|
+ 'model' => OLLAMA_MODEL,
|
|
|
+ 'prompt' => $prompt,
|
|
|
+ 'stream' => false,
|
|
|
+ 'options' => ['temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 512],
|
|
|
+ ]);
|
|
|
|
|
|
-jsonOk(['fields' => $mapped, 'method' => 'ai']);
|
|
|
+ $ch = curl_init(OLLAMA_HOST . '/api/generate');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => $payload,
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 5,
|
|
|
+ ]);
|
|
|
+ $response = curl_exec($ch);
|
|
|
+ $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ $curlErr = curl_error($ch);
|
|
|
+ curl_close($ch);
|
|
|
|
|
|
-// ─── static fallback mapper ───────────────────────────────────────────────────
|
|
|
-// Simple keyword-based mapping used when the AI is unavailable.
|
|
|
+ if ($curlErr || $httpCode !== 200) return $sampleData;
|
|
|
|
|
|
-function staticFieldMap(array $data): array
|
|
|
-{
|
|
|
- $map = [
|
|
|
- // lab_no
|
|
|
- 'lab_no' => ['LAB_NUMBER', 'Lab ID (Soil)', 'LAB_ID'],
|
|
|
- // sample / site
|
|
|
- 'sample_id' => ['PADDOCK', 'Field Name (Sample ID)', 'PADDOCK_NAME'],
|
|
|
- 'site_id' => ['CUSTNO', 'Lab performing testing'],
|
|
|
- // physical
|
|
|
- 'texture' => ['TEXTURE'],
|
|
|
- 'gravel' => ['GRAVEL'],
|
|
|
- 'colour' => ['COLOUR', 'COLOR'],
|
|
|
- // chemical
|
|
|
- 'ocarbon' => ['ORGCARBON', 'Organic Carbon %', 'Total Organic Carbon %'],
|
|
|
- 'omatter' => ['Total Organic Matter (L.O.I) %', 'Organic Matter %'],
|
|
|
- 'ph_cacl2' => ['PH_CACL2', 'PH 1:5 (CaCl2)', 'pH CaCl2', 'ph_cacl2'],
|
|
|
- 'ph_h2o' => ['PH_H2O', 'pH 1:5 (H2O)', 'pH Water'],
|
|
|
- 'ec' => ['CONDUCTY', 'EC 1:5', 'EC'],
|
|
|
- // nutrients
|
|
|
- 'NO3_N' => ['NITRATE', 'Nitrate ppm', 'Nitrate-N', 'NO3_N'],
|
|
|
- 'NH3_N' => ['NAMMONIUM', 'Ammonium', 'NH4_N'],
|
|
|
- 'p_morgan' => ['PHOS', 'Total P %', 'Phosphorus'],
|
|
|
- 'k_morgan' => ['POTASSIUM', 'Total K %', 'Potassium'],
|
|
|
- 'ca_morgan' => ['EXC_CA', 'Total Ca %', 'Calcium'],
|
|
|
- 'mg_morgan' => ['EXC_MG', 'Total Mg %', 'Magnesium'],
|
|
|
- 'na_morgan' => ['EXC_NA', 'Total Na %', 'Sodium'],
|
|
|
- 's_morgan' => ['SULPHUR', 'Total S %', 'Sulphur'],
|
|
|
- // micronutrients
|
|
|
- 'b_cacl2' => ['BORON_HOT', 'Total B ppm', 'Boron'],
|
|
|
- 'mn_dtpa' => ['DTPA_MN', 'EDTA_MN', 'Total Mn ppm', 'Manganese'],
|
|
|
- 'zn_dtpa' => ['DTPA_ZN', 'EDTA_ZN', 'Total Zn ppm', 'Zinc'],
|
|
|
- 'fe_dtpa' => ['DTPA_FE', 'EDTA_FE', 'Total Fe ppm', 'Iron', 'IRON'],
|
|
|
- 'cu_dtpa' => ['DTPA_CU', 'EDTA_CU', 'Total Cu ppm', 'Copper'],
|
|
|
- 'al' => ['ALUM_CACL2', 'EXC_AL', 'Aluminium'],
|
|
|
- // base saturation
|
|
|
- 'cec' => ['CEC', 'COND', 'SAT_COND'],
|
|
|
- 'ca_mehlick3' => ['SAT_Ca', 'SAT_CA'],
|
|
|
- 'mg_mehlick3' => ['SAT_Mg', 'SAT_MG'],
|
|
|
- 'k_mehlick3' => ['SAT_K'],
|
|
|
- 'na_mehlick3' => ['SAT_Na', 'SAT_NA'],
|
|
|
- ];
|
|
|
+ $data = json_decode($response, true);
|
|
|
+ $rawText = trim($data['response'] ?? '');
|
|
|
+ $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
|
|
|
+ $rawText = preg_replace('/\s*```$/m', '', $rawText);
|
|
|
+ if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) $rawText = $m[0];
|
|
|
|
|
|
- $result = [];
|
|
|
- foreach ($map as $dbField => $labKeys) {
|
|
|
- foreach ($labKeys as $labKey) {
|
|
|
- // Case-insensitive search
|
|
|
- foreach ($data as $k => $v) {
|
|
|
- if (strcasecmp($k, $labKey) === 0 && $v !== '') {
|
|
|
- $result[$dbField] = $v;
|
|
|
- break 2;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- return $result;
|
|
|
+ $mapped = json_decode($rawText, true);
|
|
|
+ return is_array($mapped) ? $mapped : $sampleData;
|
|
|
}
|