| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429 |
- <?php
- /**
- * controllers/soilImportController.php
- *
- * Handles XLS/XLSX/CSV upload from soil labs.
- *
- * POST /controllers/soilImportController.php
- * file — multipart upload
- * lab — lab identifier: "csbp" | "generic"
- * action — "parse" → detect lab, return sample list
- * "import_one" → map + return fields for one sample (form pre-fill)
- * "import_bulk" → save all (or selected) samples directly to DB
- * sample_idx — (import_one) 0-based index
- * samples — (import_bulk) JSON array of sample objects with overrides applied by user
- * client_id — (import_bulk) client_records.id to link records to
- */
- require_once __DIR__ . '/../config/database.php';
- require_once __DIR__ . '/../config/ai.php';
- require_once __DIR__ . '/../lib/auth.php';
- require_once __DIR__ . '/../lib/csrf.php';
- require_once __DIR__ . '/labParsers/csbp.php';
- if (session_status() === PHP_SESSION_NONE) {
- session_start();
- }
- requireLogin();
- header('Content-Type: application/json');
- // ─── helpers ─────────────────────────────────────────────────────────────────
- function jsonError(string $message, int $code = 400): never
- {
- http_response_code($code);
- echo json_encode(['success' => false, 'error' => $message]);
- exit;
- }
- function jsonOk(array $data): never
- {
- echo json_encode(['success' => true, ...$data]);
- exit;
- }
- // ─── request validation ───────────────────────────────────────────────────────
- if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
- jsonError('Method not allowed', 405);
- }
- $action = $_POST['action'] ?? 'parse';
- if (!in_array($action, ['parse', 'import_one', 'import_bulk'], true)) {
- jsonError('Invalid action');
- }
- // ─── file handling ────────────────────────────────────────────────────────────
- if (empty($_FILES['file']) || $_FILES['file']['error'] !== UPLOAD_ERR_OK) {
- jsonError('No file uploaded or upload error: ' . ($_FILES['file']['error'] ?? 'missing'));
- }
- $uploadedFile = $_FILES['file'];
- $ext = strtolower(pathinfo($uploadedFile['name'], PATHINFO_EXTENSION));
- if (!in_array($ext, ['xls', 'xlsx', 'csv', 'ods'], true)) {
- jsonError('Unsupported file type. Please upload XLS, XLSX, CSV, or ODS.');
- }
- // ─── PhpSpreadsheet ───────────────────────────────────────────────────────────
- foreach ([__DIR__ . '/../vendor/autoload.php', __DIR__ . '/../../vendor/autoload.php'] as $p) {
- if (file_exists($p)) { require_once $p; break; }
- }
- if (!class_exists('\PhpOffice\PhpSpreadsheet\IOFactory')) {
- jsonError('PhpSpreadsheet not installed. Run: composer install', 500);
- }
- use PhpOffice\PhpSpreadsheet\IOFactory;
- try {
- $spreadsheet = IOFactory::load($uploadedFile['tmp_name']);
- } catch (\Exception $e) {
- jsonError('Could not read file: ' . $e->getMessage());
- }
- $sheet = $spreadsheet->getActiveSheet();
- $rawData = [];
- foreach ($sheet->getRowIterator() as $row) {
- $cells = [];
- foreach ($row->getCellIterator() as $cell) {
- $cells[] = trim((string) $cell->getFormattedValue());
- }
- while ($cells && end($cells) === '') {
- array_pop($cells);
- }
- if ($cells) {
- $rawData[] = $cells;
- }
- }
- if (empty($rawData)) {
- jsonError('The spreadsheet appears to be empty.');
- }
- // ─── Lab detection ────────────────────────────────────────────────────────────
- //
- // Client can tell us the lab via POST['lab'], or we auto-detect.
- $lab = strtolower(trim($_POST['lab'] ?? 'auto'));
- if ($lab === 'auto' || $lab === '') {
- if (csbpDetect($rawData)) {
- $lab = 'csbp';
- } else {
- $lab = 'generic';
- }
- }
- // ─── Parse into samples ───────────────────────────────────────────────────────
- if ($lab === 'csbp') {
- $samples = csbpParse($rawData);
- } else {
- $samples = genericParse($rawData);
- }
- if (empty($samples)) {
- jsonError('No samples found in the file. Check the correct lab is selected.');
- }
- // ─── action: parse ────────────────────────────────────────────────────────────
- if ($action === 'parse') {
- $list = [];
- foreach ($samples as $idx => $s) {
- $list[] = [
- 'idx' => $idx,
- 'lab_no' => $s['lab_no'] ?? 'Sample ' . ($idx + 1),
- 'sample_id' => $s['sample_id'] ?? '', // paddock
- 'client' => $s['client_name'] ?? '',
- 'crop' => $s['crop_type'] ?? '',
- ];
- }
- jsonOk(['samples' => $list, 'count' => count($samples), 'lab' => $lab]);
- }
- // ─── action: import_one ───────────────────────────────────────────────────────
- // Returns mapped field values for a single sample to pre-fill the form.
- if ($action === 'import_one') {
- $idx = (int) ($_POST['sample_idx'] ?? 0);
- if ($idx < 0 || $idx >= count($samples)) {
- jsonError('Invalid sample index.');
- }
- $fields = $samples[$idx];
- // If generic lab, try AI mapping on top
- if ($lab === 'generic') {
- $fields = ollamaMap($fields);
- }
- $fields = array_filter($fields, fn($v) => $v !== null && $v !== '');
- jsonOk(['fields' => $fields, 'method' => $lab === 'csbp' ? 'csbp' : 'ai']);
- }
- // ─── action: import_bulk ─────────────────────────────────────────────────────
- // Saves all samples (with user-confirmed paddock IDs) directly to soil_records.
- if ($action === 'import_bulk') {
- $clientId = (int) ($_POST['client_id'] ?? 0);
- $userId = (int) getCurrentUserId();
- $confirmedJson = $_POST['samples'] ?? '[]';
- $confirmed = json_decode($confirmedJson, true);
- if (!is_array($confirmed) || empty($confirmed)) {
- // Fall back to all parsed samples
- $confirmed = $samples;
- }
- if (!$clientId) {
- jsonError('Please select a client before bulk importing.');
- }
- $pdo = getDBConnection();
- $inserted = 0;
- $skipped = [];
- foreach ($confirmed as $s) {
- // Require at minimum a lab number or sample ID
- if (empty($s['lab_no']) && empty($s['sample_id'])) {
- $skipped[] = 'Row missing lab number and paddock';
- continue;
- }
- // Avoid duplicates: skip if this lab_no already exists for this client
- if (!empty($s['lab_no'])) {
- $dup = $pdo->prepare("
- SELECT id FROM soil_records
- WHERE lab_no = ? AND CAST(client_records_id AS UNSIGNED) = ?
- LIMIT 1
- ");
- $dup->execute([$s['lab_no'], $clientId]);
- if ($dup->fetch()) {
- $skipped[] = ($s['lab_no'] ?? '') . ' (duplicate)';
- continue;
- }
- }
- $rand = (string) (mt_rand(1000, 9999) / 1000); // matches existing rand pattern
- $stmt = $pdo->prepare("
- INSERT INTO soil_records (
- client_records_id, modx_user_id, date, client_name,
- analysis_type, lab_no, batch_no, sample_id, site_id,
- crop_type, soil_type, date_sampled,
- texture, gravel, colour,
- ph_cacl2, ph_h2o, ec, ocarbon, omatter, paramag,
- NO3_N, NH3_N,
- p_mehlick, p_bray2, p_morgan,
- k_morgan, ca_morgan, mg_morgan, na_morgan, s_morgan,
- b_cacl2, mn_dtpa, zn_dtpa, fe_dtpa, cu_dtpa, al, se,
- tec, cec, ca_mehlick3, mg_mehlick3, k_mehlick3, na_mehlick3, al_mehlick3,
- rand, status
- ) VALUES (
- ?, ?, NOW(), ?,
- ?, ?, ?, ?, ?,
- ?, ?, ?,
- ?, ?, ?,
- ?, ?, ?, ?, ?, ?,
- ?, ?,
- ?, ?, ?,
- ?, ?, ?, ?, ?,
- ?, ?, ?, ?, ?, ?, ?,
- ?, ?, ?, ?, ?, ?, ?,
- ?, '0'
- )
- ");
- $n = fn(string $key) => isset($s[$key]) && $s[$key] !== '' ? (float) $s[$key] : null;
- $t = fn(string $key) => $s[$key] ?? null;
- $stmt->execute([
- $clientId, $userId, $t('client_name'),
- $t('analysis_type'), $t('lab_no'), $t('batch_no'), $t('sample_id'), $t('site_id'),
- $t('crop_type'), $t('soil_type'), $t('date_sampled'),
- $t('texture'), $n('gravel'), $t('colour'),
- $n('ph_cacl2'), $n('ph_h2o'), $n('ec'), $n('ocarbon'), $n('omatter'), $n('paramag'),
- $n('NO3_N'), $n('NH3_N'),
- $n('p_mehlick'), $n('p_bray2'), $n('p_morgan'),
- $n('k_morgan'), $n('ca_morgan'), $n('mg_morgan'), $n('na_morgan'), $n('s_morgan'),
- $n('b_cacl2'), $n('mn_dtpa'), $n('zn_dtpa'), $n('fe_dtpa'), $n('cu_dtpa'), $n('al'), $n('se'),
- $n('tec'), $n('cec'), $n('ca_mehlick3'), $n('mg_mehlick3'), $n('k_mehlick3'), $n('na_mehlick3'), $n('al_mehlick3'),
- $rand,
- ]);
- $inserted++;
- }
- jsonOk([
- 'inserted' => $inserted,
- 'skipped' => $skipped,
- 'message' => "{$inserted} sample" . ($inserted !== 1 ? 's' : '') . " imported successfully."
- . (count($skipped) ? ' Skipped: ' . implode(', ', $skipped) : ''),
- ]);
- }
- // ─── Generic parser (non-CSBP files) ─────────────────────────────────────────
- function genericParse(array $rawData): array
- {
- // Reuse the original detection + extraction logic for unknown labs
- $fmt = detectGenericFormat($rawData);
- return $fmt['transposed']
- ? extractTransposed($rawData)
- : extractRowBased($rawData, $fmt['headerRow']);
- }
- function detectGenericFormat(array $rawData): array
- {
- $transposedSignals = [
- '1:5', 'total p', 'total k', 'total ca', 'total mg', 'total na',
- 'total s', 'total n', 'total b', 'total zn', 'total mn', 'total fe',
- 'total cu', 'organic matter', 'organic carbon', 'lab id', 'nitrate ppm',
- 'ph 1:5', 'moisture %', 'consultant', 'field name',
- ];
- $rowBasedSignals = [
- 'lab_number', 'custno', 'paddock', 'ph_cacl2', 'ph_h2o',
- 'dtpa_cu', 'dtpa_zn', 'dtpa_mn', 'dtpa_fe', 'conducty',
- 'orgcarbon', 'nitrate', 'nammonium', 'texture', 'gravel',
- 'exc_ca', 'exc_mg', 'exc_na', 'exc_k', 'alum_cacl2',
- 'boron_hot', 'client name', 'lab number', 'sat_ca', 'sat_mg',
- 'sat_k', 'sat_na', 'crop', 'sp%',
- ];
- $transposedScore = 0;
- foreach (array_slice($rawData, 0, 20) as $row) {
- $cell = strtolower($row[0] ?? '');
- foreach ($transposedSignals as $sig) {
- if (str_contains($cell, $sig)) { $transposedScore++; break; }
- }
- }
- if ($transposedScore >= 2) {
- return ['transposed' => true, 'headerRow' => 0];
- }
- $bestRow = 0; $bestScore = 0;
- for ($i = 0; $i < min(10, count($rawData)); $i++) {
- $score = 0;
- foreach ($rawData[$i] as $cell) {
- $cell = strtolower(trim($cell));
- foreach ($rowBasedSignals as $sig) {
- if (str_contains($cell, $sig)) { $score++; break; }
- }
- }
- if ($score > $bestScore) { $bestScore = $score; $bestRow = $i; }
- }
- return ['transposed' => false, 'headerRow' => $bestRow];
- }
- function extractRowBased(array $rawData, int $headerRow): array
- {
- $headers = $rawData[$headerRow];
- $samples = [];
- for ($r = $headerRow + 1; $r < count($rawData); $r++) {
- $row = $rawData[$r]; $sample = [];
- foreach ($headers as $c => $header) {
- if ($header === '') continue;
- $sample[$header] = $row[$c] ?? '';
- }
- if (count(array_filter($sample)) >= 3) $samples[] = $sample;
- }
- return $samples;
- }
- function extractTransposed(array $rawData): array
- {
- $labels = array_column($rawData, 0);
- $maxCols = max(array_map('count', $rawData));
- $samples = [];
- for ($col = 1; $col < $maxCols; $col++) {
- $sample = [];
- foreach ($rawData as $ri => $row) {
- $label = trim($labels[$ri] ?? '');
- $value = trim($row[$col] ?? '');
- if ($label !== '' && $value !== '') $sample[$label] = $value;
- }
- if (count(array_filter($sample)) >= 3) $samples[] = $sample;
- }
- return $samples;
- }
- // ─── Ollama field mapping (generic lab fallback) ──────────────────────────────
- function ollamaMap(array $sampleData): array
- {
- $labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE);
- $prompt = <<<EOT
- You are a soil laboratory data mapper. Output ONLY a JSON object — no explanation, no markdown.
- Map the LAB DATA to these TARGET FIELDS:
- lab_no=Lab reference number
- sample_id=Sample/paddock identifier
- site_id=Site/block identifier
- date_sampled=Date as YYYY-MM-DD
- texture=Soil texture
- gravel=Gravel % (number)
- colour=Soil colour
- ocarbon=Organic carbon % (number)
- omatter=Organic matter % (number)
- ph_cacl2=pH CaCl2 (number)
- ph_h2o=pH water (number)
- ec=EC dS/m (number)
- NO3_N=Nitrate-N mg/kg (number)
- NH3_N=Ammonium-N mg/kg (number)
- p_morgan=Phosphorus mg/kg (number)
- k_morgan=Potassium mg/kg (number)
- ca_morgan=Calcium mg/kg (number)
- mg_morgan=Magnesium mg/kg (number)
- na_morgan=Sodium mg/kg (number)
- s_morgan=Sulphur mg/kg (number)
- b_cacl2=Boron mg/kg (number)
- mn_dtpa=Manganese mg/kg (number)
- zn_dtpa=Zinc mg/kg (number)
- fe_dtpa=Iron mg/kg (number)
- cu_dtpa=Copper mg/kg (number)
- al=Aluminium mg/kg (number)
- cec=CEC meq/100g (number)
- LAB DATA: {$labJson}
- Rules: only use values in the data. Strip units. Use null for unmapped. Output JSON only.
- EOT;
- $payload = json_encode([
- 'model' => OLLAMA_MODEL,
- 'prompt' => $prompt,
- 'stream' => false,
- 'options' => ['temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 512],
- ]);
- $ch = curl_init(OLLAMA_HOST . '/api/generate');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => $payload,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
- CURLOPT_CONNECTTIMEOUT => 5,
- ]);
- $response = curl_exec($ch);
- $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $curlErr = curl_error($ch);
- curl_close($ch);
- if ($curlErr || $httpCode !== 200) return $sampleData;
- $data = json_decode($response, true);
- $rawText = trim($data['response'] ?? '');
- $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
- $rawText = preg_replace('/\s*```$/m', '', $rawText);
- if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) $rawText = $m[0];
- $mapped = json_decode($rawText, true);
- return is_array($mapped) ? $mapped : $sampleData;
- }
|