false, 'error' => $message]); exit; } function jsonOk(array $data): never { echo json_encode(['success' => true, ...$data]); exit; } // ─── request validation ─────────────────────────────────────────────────────── if ($_SERVER['REQUEST_METHOD'] !== 'POST') { jsonError('Method not allowed', 405); } $action = $_POST['action'] ?? 'parse'; if (!in_array($action, ['parse', 'import'], true)) { jsonError('Invalid action'); } // ─── file handling ──────────────────────────────────────────────────────────── if (empty($_FILES['file']) || $_FILES['file']['error'] !== UPLOAD_ERR_OK) { jsonError('No file uploaded or upload error: ' . ($_FILES['file']['error'] ?? 'missing')); } $uploadedFile = $_FILES['file']; $ext = strtolower(pathinfo($uploadedFile['name'], PATHINFO_EXTENSION)); if (!in_array($ext, ['xls', 'xlsx', 'csv', 'ods'], true)) { jsonError('Unsupported file type. Please upload XLS, XLSX, CSV, or ODS.'); } // ─── PhpSpreadsheet ─────────────────────────────────────────────────────────── $autoloadPaths = [ __DIR__ . '/../vendor/autoload.php', __DIR__ . '/../../vendor/autoload.php', ]; $autoloaded = false; foreach ($autoloadPaths as $path) { if (file_exists($path)) { require_once $path; $autoloaded = true; break; } } if (!$autoloaded) { jsonError('PhpSpreadsheet not installed. Run: composer require phpoffice/phpspreadsheet', 500); } use PhpOffice\PhpSpreadsheet\IOFactory; use PhpOffice\PhpSpreadsheet\Spreadsheet; try { $spreadsheet = IOFactory::load($uploadedFile['tmp_name']); } catch (\Exception $e) { jsonError('Could not read file: ' . $e->getMessage()); } $sheet = $spreadsheet->getActiveSheet(); // Convert sheet to a 2-D array (1-indexed rows and cols → 0-indexed) $rawData = []; foreach ($sheet->getRowIterator() as $row) { $cells = []; foreach ($row->getCellIterator() as $cell) { $cells[] = trim((string) $cell->getFormattedValue()); } // Strip trailing empty cells while ($cells && end($cells) === '') { array_pop($cells); } if ($cells) { $rawData[] = $cells; } } if (empty($rawData)) { jsonError('The spreadsheet appears to be empty.'); } // ─── format detection ───────────────────────────────────────────────────────── // // Two layouts found in CSBP lab files: // // TRANSPOSED (lab card) — Column 0 = row labels ("EC 1:5", "Total P %", …) // Columns 1-N = one sample each. // Example: SOIL CONTROL XNS06189.xls // // ROW-BASED (report) — One row = column headers; subsequent rows = samples. // May have 1-3 title/subtitle rows above the headers. // Example: S-C Soil Tests 2006.xls, YOS06 42-48.xlsx // // Detection strategy: // 1. Score column-0 values for soil-chemistry label patterns (units, element // names, "1:5", "ppm", etc.). ≥2 matches → transposed. // 2. Otherwise scan the first 10 rows for a "header row" — the row that best // matches known CSBP column-code keywords. Everything above it is a title. /** * Returns ['transposed' => bool, 'headerRow' => int] */ function detectFormat(array $rawData): array { // Phrases that appear in column-0 of a transposed lab card $transposedSignals = [ '1:5', 'total p', 'total k', 'total ca', 'total mg', 'total na', 'total s', 'total n', 'total b', 'total zn', 'total mn', 'total fe', 'total cu', 'total cl', 'organic matter', 'organic carbon', 'lab id', 'lab performing', 'field name', 'nitrate ppm', 'ph 1:5', 'moisture %', 'consultant', ]; // Phrases that appear in the header row of a row-based file $rowBasedSignals = [ 'lab_number', 'custno', 'paddock', 'ph_cacl2', 'ph_h2o', 'dtpa_cu', 'dtpa_zn', 'dtpa_mn', 'dtpa_fe', 'conducty', 'orgcarbon', 'nitrate', 'nammonium', 'texture', 'gravel', 'exc_ca', 'exc_mg', 'exc_na', 'exc_k', 'alum_cacl2', 'boron_hot', 'client name', 'lab number', 'sat_ca', 'sat_mg', 'sat_k', 'sat_na', 'crop', 'sp%', ]; // Step 1: score column-0 values for transposed signals $transposedScore = 0; foreach (array_slice($rawData, 0, 20) as $row) { $cell = strtolower($row[0] ?? ''); if ($cell === '') { continue; } foreach ($transposedSignals as $signal) { if (str_contains($cell, $signal)) { $transposedScore++; break; } } } if ($transposedScore >= 2) { return ['transposed' => true, 'headerRow' => 0]; } // Step 2: find the best header row in a row-based file $bestRow = 0; $bestScore = 0; for ($i = 0; $i < min(10, count($rawData)); $i++) { $score = 0; foreach ($rawData[$i] as $cell) { $cell = strtolower(trim($cell)); if ($cell === '') { continue; } foreach ($rowBasedSignals as $signal) { if (str_contains($cell, $signal)) { $score++; break; } } } if ($score > $bestScore) { $bestScore = $score; $bestRow = $i; } } return ['transposed' => false, 'headerRow' => $bestRow]; } // ─── extract samples ────────────────────────────────────────────────────────── function extractSamplesRowBased(array $rawData, int $headerRow): array { $headers = $rawData[$headerRow]; $samples = []; for ($r = $headerRow + 1; $r < count($rawData); $r++) { $row = $rawData[$r]; $sample = []; foreach ($headers as $c => $header) { if ($header === '') { continue; } $sample[$header] = $row[$c] ?? ''; } if (array_filter($sample)) { $samples[] = $sample; } } return $samples; } function extractSamplesTransposed(array $rawData): array { // Column 0 = labels; columns 1-N = samples. // Count sample columns from the widest row. $labels = array_column($rawData, 0); $maxCols = max(array_map('count', $rawData)); $samples = []; for ($col = 1; $col < $maxCols; $col++) { $sample = []; foreach ($rawData as $rowIdx => $row) { $label = trim($labels[$rowIdx] ?? ''); $value = trim($row[$col] ?? ''); if ($label !== '' && $value !== '') { $sample[$label] = $value; } } // Only keep columns that have at least a few populated cells if (count(array_filter($sample)) >= 3) { $samples[] = $sample; } } return $samples; } $fmt = detectFormat($rawData); $samples = $fmt['transposed'] ? extractSamplesTransposed($rawData) : extractSamplesRowBased($rawData, $fmt['headerRow']); if (empty($samples)) { jsonError('No samples found in the file.'); } // ─── action: parse ──────────────────────────────────────────────────────────── // Return a lightweight list of samples so the UI can let the user pick one. if ($action === 'parse') { $list = []; foreach ($samples as $idx => $sample) { // Try to find a meaningful display label $labId = $sample['LAB_NUMBER'] ?? $sample['Lab ID (Soil)'] ?? $sample['LAB_ID'] ?? "Sample " . ($idx + 1); $client = $sample['CLIENT NAME'] ?? $sample['Consultant'] ?? $sample['CUSTNO'] ?? ''; $crop = $sample['CROP'] ?? $sample['Material (manure, sawdust, etc.)'] ?? ''; $pad = $sample['PADDOCK'] ?? $sample['Field Name (Sample ID)'] ?? ''; $list[] = [ 'idx' => $idx, 'lab_id' => $labId, 'client' => $client, 'crop' => $crop, 'site' => $pad, ]; } jsonOk([ 'samples' => $list, 'count' => count($samples), 'format' => $fmt['transposed'] ? 'transposed' : 'row-based', 'header_row' => $fmt['headerRow'] ?? 0, ]); } // ─── action: import ─────────────────────────────────────────────────────────── $sampleIdx = (int) ($_POST['sample_idx'] ?? 0); if ($sampleIdx < 0 || $sampleIdx >= count($samples)) { jsonError('Invalid sample index.'); } $sampleData = $samples[$sampleIdx]; // ─── Ollama field mapping ───────────────────────────────────────────────────── $labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE); $prompt = << OLLAMA_MODEL, 'prompt' => $prompt, 'stream' => false, 'options' => [ 'temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 1024, ], ]); $ch = curl_init(OLLAMA_HOST . '/api/generate'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => OLLAMA_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 5, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($curlErr || $httpCode !== 200) { $mapped = staticFieldMap($sampleData); $warning = $curlErr ?: "Ollama HTTP {$httpCode}"; jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI unavailable: ' . $warning]); } $ollamaData = json_decode($response, true); $rawText = trim($ollamaData['response'] ?? ''); if ($rawText === '') { $mapped = staticFieldMap($sampleData); jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'Ollama returned empty response']); } // Strip any markdown code fences the model might wrap around the JSON $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText); $rawText = preg_replace('/\s*```$/m', '', $rawText); // Extract the first JSON object if the model added commentary if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) { $rawText = $m[0]; } $mapped = json_decode($rawText, true); if (!is_array($mapped)) { $mapped = staticFieldMap($sampleData); jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI returned unparseable JSON']); } // Remove null/empty values $mapped = array_filter($mapped, fn($v) => $v !== null && $v !== ''); jsonOk(['fields' => $mapped, 'method' => 'ai']); // ─── static fallback mapper ─────────────────────────────────────────────────── // Simple keyword-based mapping used when the AI is unavailable. function staticFieldMap(array $data): array { $map = [ // lab_no 'lab_no' => ['LAB_NUMBER', 'Lab ID (Soil)', 'LAB_ID'], // sample / site 'sample_id' => ['PADDOCK', 'Field Name (Sample ID)', 'PADDOCK_NAME'], 'site_id' => ['CUSTNO', 'Lab performing testing'], // physical 'texture' => ['TEXTURE'], 'gravel' => ['GRAVEL'], 'colour' => ['COLOUR', 'COLOR'], // chemical 'ocarbon' => ['ORGCARBON', 'Organic Carbon %', 'Total Organic Carbon %'], 'omatter' => ['Total Organic Matter (L.O.I) %', 'Organic Matter %'], 'ph_cacl2' => ['PH_CACL2', 'PH 1:5 (CaCl2)', 'pH CaCl2', 'ph_cacl2'], 'ph_h2o' => ['PH_H2O', 'pH 1:5 (H2O)', 'pH Water'], 'ec' => ['CONDUCTY', 'EC 1:5', 'EC'], // nutrients 'NO3_N' => ['NITRATE', 'Nitrate ppm', 'Nitrate-N', 'NO3_N'], 'NH3_N' => ['NAMMONIUM', 'Ammonium', 'NH4_N'], 'p_morgan' => ['PHOS', 'Total P %', 'Phosphorus'], 'k_morgan' => ['POTASSIUM', 'Total K %', 'Potassium'], 'ca_morgan' => ['EXC_CA', 'Total Ca %', 'Calcium'], 'mg_morgan' => ['EXC_MG', 'Total Mg %', 'Magnesium'], 'na_morgan' => ['EXC_NA', 'Total Na %', 'Sodium'], 's_morgan' => ['SULPHUR', 'Total S %', 'Sulphur'], // micronutrients 'b_cacl2' => ['BORON_HOT', 'Total B ppm', 'Boron'], 'mn_dtpa' => ['DTPA_MN', 'EDTA_MN', 'Total Mn ppm', 'Manganese'], 'zn_dtpa' => ['DTPA_ZN', 'EDTA_ZN', 'Total Zn ppm', 'Zinc'], 'fe_dtpa' => ['DTPA_FE', 'EDTA_FE', 'Total Fe ppm', 'Iron', 'IRON'], 'cu_dtpa' => ['DTPA_CU', 'EDTA_CU', 'Total Cu ppm', 'Copper'], 'al' => ['ALUM_CACL2', 'EXC_AL', 'Aluminium'], // base saturation 'cec' => ['CEC', 'COND', 'SAT_COND'], 'ca_mehlick3' => ['SAT_Ca', 'SAT_CA'], 'mg_mehlick3' => ['SAT_Mg', 'SAT_MG'], 'k_mehlick3' => ['SAT_K'], 'na_mehlick3' => ['SAT_Na', 'SAT_NA'], ]; $result = []; foreach ($map as $dbField => $labKeys) { foreach ($labKeys as $labKey) { // Case-insensitive search foreach ($data as $k => $v) { if (strcasecmp($k, $labKey) === 0 && $v !== '') { $result[$dbField] = $v; break 2; } } } } return $result; }