false, 'error' => $message]); exit; } function jsonOk(array $data): never { echo json_encode(['success' => true, ...$data]); exit; } // ─── request validation ─────────────────────────────────────────────────────── if ($_SERVER['REQUEST_METHOD'] !== 'POST') { jsonError('Method not allowed', 405); } $action = $_POST['action'] ?? 'parse'; if (!in_array($action, ['parse', 'import'], true)) { jsonError('Invalid action'); } // ─── file handling ──────────────────────────────────────────────────────────── if (empty($_FILES['file']) || $_FILES['file']['error'] !== UPLOAD_ERR_OK) { jsonError('No file uploaded or upload error: ' . ($_FILES['file']['error'] ?? 'missing')); } $uploadedFile = $_FILES['file']; $ext = strtolower(pathinfo($uploadedFile['name'], PATHINFO_EXTENSION)); if (!in_array($ext, ['xls', 'xlsx', 'csv', 'ods'], true)) { jsonError('Unsupported file type. Please upload XLS, XLSX, CSV, or ODS.'); } // ─── PhpSpreadsheet ─────────────────────────────────────────────────────────── $autoloadPaths = [ __DIR__ . '/../vendor/autoload.php', __DIR__ . '/../../vendor/autoload.php', ]; $autoloaded = false; foreach ($autoloadPaths as $path) { if (file_exists($path)) { require_once $path; $autoloaded = true; break; } } if (!$autoloaded) { jsonError('PhpSpreadsheet not installed. Run: composer require phpoffice/phpspreadsheet', 500); } use PhpOffice\PhpSpreadsheet\IOFactory; use PhpOffice\PhpSpreadsheet\Spreadsheet; try { $spreadsheet = IOFactory::load($uploadedFile['tmp_name']); } catch (\Exception $e) { jsonError('Could not read file: ' . $e->getMessage()); } $sheet = $spreadsheet->getActiveSheet(); // Convert sheet to a 2-D array (1-indexed rows and cols → 0-indexed) $rawData = []; foreach ($sheet->getRowIterator() as $row) { $cells = []; foreach ($row->getCellIterator() as $cell) { $cells[] = trim((string) $cell->getFormattedValue()); } // Strip trailing empty cells while ($cells && end($cells) === '') { array_pop($cells); } if ($cells) { $rawData[] = $cells; } } if (empty($rawData)) { jsonError('The spreadsheet appears to be empty.'); } // ─── format detection ───────────────────────────────────────────────────────── // // Two layouts found in CSBP lab files: // ROW-BASED — Row 0 = column headers (LAB_NUMBER, TEXTURE, PH_CACL2 …) // Rows 1-N = one sample per row. // TRANSPOSED — Column 0 = row labels (EC 1:5, Total P % …) // Columns 1-N = one sample per column. // // Heuristic: if the first cell of row 0 looks like a short code/identifier // (< 20 chars, no spaces, all-caps or underscored) → ROW-BASED. // Otherwise → TRANSPOSED. function isRowBased(array $rawData): bool { $firstCell = $rawData[0][0] ?? ''; // Short, code-like headers signal a row-based layout return strlen($firstCell) < 25 && !str_contains($firstCell, ' ') && $firstCell !== ''; } $transposed = !isRowBased($rawData); // ─── extract samples ────────────────────────────────────────────────────────── // // Returns an array of samples, each sample being an assoc array of // label → value. function extractSamplesRowBased(array $rawData): array { $headers = $rawData[0]; $samples = []; for ($r = 1; $r < count($rawData); $r++) { $row = $rawData[$r]; $sample = []; foreach ($headers as $c => $header) { if ($header === '') { continue; } $sample[$header] = $row[$c] ?? ''; } if (array_filter($sample)) { $samples[] = $sample; } } return $samples; } function extractSamplesTransposed(array $rawData): array { // Column 0 = labels; columns 1-N = samples $labels = array_column($rawData, 0); $numSamples = max(array_map('count', $rawData)) - 1; $samples = []; for ($col = 1; $col <= $numSamples; $col++) { $sample = []; foreach ($rawData as $rowIdx => $row) { $label = $labels[$rowIdx] ?? ''; $value = $row[$col] ?? ''; if ($label !== '' && $value !== '') { $sample[$label] = $value; } } if (array_filter($sample)) { $samples[] = $sample; } } return $samples; } $samples = $transposed ? extractSamplesTransposed($rawData) : extractSamplesRowBased($rawData); if (empty($samples)) { jsonError('No samples found in the file.'); } // ─── action: parse ──────────────────────────────────────────────────────────── // Return a lightweight list of samples so the UI can let the user pick one. if ($action === 'parse') { $list = []; foreach ($samples as $idx => $sample) { // Try to find a meaningful display label $labId = $sample['LAB_NUMBER'] ?? $sample['Lab ID (Soil)'] ?? $sample['LAB_ID'] ?? "Sample " . ($idx + 1); $client = $sample['CLIENT NAME'] ?? $sample['Consultant'] ?? $sample['CUSTNO'] ?? ''; $crop = $sample['CROP'] ?? $sample['Material (manure, sawdust, etc.)'] ?? ''; $pad = $sample['PADDOCK'] ?? $sample['Field Name (Sample ID)'] ?? ''; $list[] = [ 'idx' => $idx, 'lab_id' => $labId, 'client' => $client, 'crop' => $crop, 'site' => $pad, ]; } jsonOk(['samples' => $list, 'count' => count($samples)]); } // ─── action: import ─────────────────────────────────────────────────────────── $sampleIdx = (int) ($_POST['sample_idx'] ?? 0); if ($sampleIdx < 0 || $sampleIdx >= count($samples)) { jsonError('Invalid sample index.'); } $sampleData = $samples[$sampleIdx]; // ─── Ollama field mapping ───────────────────────────────────────────────────── $labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE); $prompt = << OLLAMA_MODEL, 'prompt' => $prompt, 'stream' => false, 'options' => [ 'temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 1024, ], ]); $ch = curl_init(OLLAMA_HOST . '/api/generate'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => OLLAMA_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 5, ]); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $curlErr = curl_error($ch); curl_close($ch); if ($curlErr || $httpCode !== 200) { $mapped = staticFieldMap($sampleData); $warning = $curlErr ?: "Ollama HTTP {$httpCode}"; jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI unavailable: ' . $warning]); } $ollamaData = json_decode($response, true); $rawText = trim($ollamaData['response'] ?? ''); if ($rawText === '') { $mapped = staticFieldMap($sampleData); jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'Ollama returned empty response']); } // Strip any markdown code fences the model might wrap around the JSON $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText); $rawText = preg_replace('/\s*```$/m', '', $rawText); // Extract the first JSON object if the model added commentary if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) { $rawText = $m[0]; } $mapped = json_decode($rawText, true); if (!is_array($mapped)) { $mapped = staticFieldMap($sampleData); jsonOk(['fields' => $mapped, 'method' => 'static', 'warning' => 'AI returned unparseable JSON']); } // Remove null/empty values $mapped = array_filter($mapped, fn($v) => $v !== null && $v !== ''); jsonOk(['fields' => $mapped, 'method' => 'ai']); // ─── static fallback mapper ─────────────────────────────────────────────────── // Simple keyword-based mapping used when the AI is unavailable. function staticFieldMap(array $data): array { $map = [ // lab_no 'lab_no' => ['LAB_NUMBER', 'Lab ID (Soil)', 'LAB_ID'], // sample / site 'sample_id' => ['PADDOCK', 'Field Name (Sample ID)', 'PADDOCK_NAME'], 'site_id' => ['CUSTNO', 'Lab performing testing'], // physical 'texture' => ['TEXTURE'], 'gravel' => ['GRAVEL'], 'colour' => ['COLOUR', 'COLOR'], // chemical 'ocarbon' => ['ORGCARBON', 'Organic Carbon %', 'Total Organic Carbon %'], 'omatter' => ['Total Organic Matter (L.O.I) %', 'Organic Matter %'], 'ph_cacl2' => ['PH_CACL2', 'PH 1:5 (CaCl2)', 'pH CaCl2', 'ph_cacl2'], 'ph_h2o' => ['PH_H2O', 'pH 1:5 (H2O)', 'pH Water'], 'ec' => ['CONDUCTY', 'EC 1:5', 'EC'], // nutrients 'NO3_N' => ['NITRATE', 'Nitrate ppm', 'Nitrate-N', 'NO3_N'], 'NH3_N' => ['NAMMONIUM', 'Ammonium', 'NH4_N'], 'p_morgan' => ['PHOS', 'Total P %', 'Phosphorus'], 'k_morgan' => ['POTASSIUM', 'Total K %', 'Potassium'], 'ca_morgan' => ['EXC_CA', 'Total Ca %', 'Calcium'], 'mg_morgan' => ['EXC_MG', 'Total Mg %', 'Magnesium'], 'na_morgan' => ['EXC_NA', 'Total Na %', 'Sodium'], 's_morgan' => ['SULPHUR', 'Total S %', 'Sulphur'], // micronutrients 'b_cacl2' => ['BORON_HOT', 'Total B ppm', 'Boron'], 'mn_dtpa' => ['DTPA_MN', 'EDTA_MN', 'Total Mn ppm', 'Manganese'], 'zn_dtpa' => ['DTPA_ZN', 'EDTA_ZN', 'Total Zn ppm', 'Zinc'], 'fe_dtpa' => ['DTPA_FE', 'EDTA_FE', 'Total Fe ppm', 'Iron', 'IRON'], 'cu_dtpa' => ['DTPA_CU', 'EDTA_CU', 'Total Cu ppm', 'Copper'], 'al' => ['ALUM_CACL2', 'EXC_AL', 'Aluminium'], // base saturation 'cec' => ['CEC', 'COND', 'SAT_COND'], 'ca_mehlick3' => ['SAT_Ca', 'SAT_CA'], 'mg_mehlick3' => ['SAT_Mg', 'SAT_MG'], 'k_mehlick3' => ['SAT_K'], 'na_mehlick3' => ['SAT_Na', 'SAT_NA'], ]; $result = []; foreach ($map as $dbField => $labKeys) { foreach ($labKeys as $labKey) { // Case-insensitive search foreach ($data as $k => $v) { if (strcasecmp($k, $labKey) === 0 && $v !== '') { $result[$dbField] = $v; break 2; } } } } return $result; }