soilImportController.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. <?php
  2. /**
  3. * controllers/soilImportController.php
  4. *
  5. * Handles XLS/XLSX/CSV upload from soil labs.
  6. *
  7. * POST /controllers/soilImportController.php
  8. * file — multipart upload
  9. * lab — lab identifier: "csbp" | "generic"
  10. * action — "parse" → detect lab, return sample list
  11. * "import_one" → map + return fields for one sample (form pre-fill)
  12. * "import_bulk" → save all (or selected) samples directly to DB
  13. * sample_idx — (import_one) 0-based index
  14. * samples — (import_bulk) JSON array of sample objects with overrides applied by user
  15. * client_id — (import_bulk) client_records.id to link records to
  16. */
  17. require_once __DIR__ . '/../config/database.php';
  18. require_once __DIR__ . '/../config/ai.php';
  19. require_once __DIR__ . '/../lib/auth.php';
  20. require_once __DIR__ . '/../lib/csrf.php';
  21. require_once __DIR__ . '/labParsers/csbp.php';
  22. if (session_status() === PHP_SESSION_NONE) {
  23. session_start();
  24. }
  25. requireLogin();
  26. header('Content-Type: application/json');
  27. // ─── helpers ─────────────────────────────────────────────────────────────────
  28. function jsonError(string $message, int $code = 400): never
  29. {
  30. http_response_code($code);
  31. echo json_encode(['success' => false, 'error' => $message]);
  32. exit;
  33. }
  34. function jsonOk(array $data): never
  35. {
  36. echo json_encode(['success' => true, ...$data]);
  37. exit;
  38. }
  39. // ─── request validation ───────────────────────────────────────────────────────
  40. if ($_SERVER['REQUEST_METHOD'] !== 'POST') {
  41. jsonError('Method not allowed', 405);
  42. }
  43. $action = $_POST['action'] ?? 'parse';
  44. if (!in_array($action, ['parse', 'import_one', 'import_bulk'], true)) {
  45. jsonError('Invalid action');
  46. }
  47. // ─── file handling ────────────────────────────────────────────────────────────
  48. if (empty($_FILES['file']) || $_FILES['file']['error'] !== UPLOAD_ERR_OK) {
  49. jsonError('No file uploaded or upload error: ' . ($_FILES['file']['error'] ?? 'missing'));
  50. }
  51. $uploadedFile = $_FILES['file'];
  52. $ext = strtolower(pathinfo($uploadedFile['name'], PATHINFO_EXTENSION));
  53. if (!in_array($ext, ['xls', 'xlsx', 'csv', 'ods'], true)) {
  54. jsonError('Unsupported file type. Please upload XLS, XLSX, CSV, or ODS.');
  55. }
  56. // ─── PhpSpreadsheet ───────────────────────────────────────────────────────────
  57. foreach ([__DIR__ . '/../vendor/autoload.php', __DIR__ . '/../../vendor/autoload.php'] as $p) {
  58. if (file_exists($p)) { require_once $p; break; }
  59. }
  60. if (!class_exists('\PhpOffice\PhpSpreadsheet\IOFactory')) {
  61. jsonError('PhpSpreadsheet not installed. Run: composer install', 500);
  62. }
  63. use PhpOffice\PhpSpreadsheet\IOFactory;
  64. try {
  65. $spreadsheet = IOFactory::load($uploadedFile['tmp_name']);
  66. } catch (\Exception $e) {
  67. jsonError('Could not read file: ' . $e->getMessage());
  68. }
  69. $sheet = $spreadsheet->getActiveSheet();
  70. $rawData = [];
  71. foreach ($sheet->getRowIterator() as $row) {
  72. $cells = [];
  73. foreach ($row->getCellIterator() as $cell) {
  74. $cells[] = trim((string) $cell->getFormattedValue());
  75. }
  76. while ($cells && end($cells) === '') {
  77. array_pop($cells);
  78. }
  79. if ($cells) {
  80. $rawData[] = $cells;
  81. }
  82. }
  83. if (empty($rawData)) {
  84. jsonError('The spreadsheet appears to be empty.');
  85. }
  86. // ─── Lab detection ────────────────────────────────────────────────────────────
  87. //
  88. // Client can tell us the lab via POST['lab'], or we auto-detect.
  89. $lab = strtolower(trim($_POST['lab'] ?? 'auto'));
  90. if ($lab === 'auto' || $lab === '') {
  91. if (csbpDetect($rawData)) {
  92. $lab = 'csbp';
  93. } else {
  94. $lab = 'generic';
  95. }
  96. }
  97. // ─── Parse into samples ───────────────────────────────────────────────────────
  98. if ($lab === 'csbp') {
  99. $samples = csbpParse($rawData);
  100. } else {
  101. $samples = genericParse($rawData);
  102. }
  103. if (empty($samples)) {
  104. jsonError('No samples found in the file. Check the correct lab is selected.');
  105. }
  106. // ─── action: parse ────────────────────────────────────────────────────────────
  107. if ($action === 'parse') {
  108. $list = [];
  109. foreach ($samples as $idx => $s) {
  110. $list[] = [
  111. 'idx' => $idx,
  112. 'lab_no' => $s['lab_no'] ?? 'Sample ' . ($idx + 1),
  113. 'sample_id' => $s['sample_id'] ?? '', // paddock
  114. 'client' => $s['client_name'] ?? '',
  115. 'crop' => $s['crop_type'] ?? '',
  116. ];
  117. }
  118. jsonOk(['samples' => $list, 'count' => count($samples), 'lab' => $lab]);
  119. }
  120. // ─── action: import_one ───────────────────────────────────────────────────────
  121. // Returns mapped field values for a single sample to pre-fill the form.
  122. if ($action === 'import_one') {
  123. $idx = (int) ($_POST['sample_idx'] ?? 0);
  124. if ($idx < 0 || $idx >= count($samples)) {
  125. jsonError('Invalid sample index.');
  126. }
  127. $fields = $samples[$idx];
  128. // If generic lab, try AI mapping on top
  129. if ($lab === 'generic') {
  130. $fields = ollamaMap($fields);
  131. }
  132. $fields = array_filter($fields, fn($v) => $v !== null && $v !== '');
  133. jsonOk(['fields' => $fields, 'method' => $lab === 'csbp' ? 'csbp' : 'ai']);
  134. }
  135. // ─── action: import_bulk ─────────────────────────────────────────────────────
  136. // Saves all samples (with user-confirmed paddock IDs) directly to soil_records.
  137. if ($action === 'import_bulk') {
  138. $clientId = (int) ($_POST['client_id'] ?? 0);
  139. $userId = (int) getCurrentUserId();
  140. $confirmedJson = $_POST['samples'] ?? '[]';
  141. $confirmed = json_decode($confirmedJson, true);
  142. if (!is_array($confirmed) || empty($confirmed)) {
  143. // Fall back to all parsed samples
  144. $confirmed = $samples;
  145. }
  146. if (!$clientId) {
  147. jsonError('Please select a client before bulk importing.');
  148. }
  149. $pdo = getDBConnection();
  150. $inserted = 0;
  151. $skipped = [];
  152. foreach ($confirmed as $s) {
  153. // Require at minimum a lab number or sample ID
  154. if (empty($s['lab_no']) && empty($s['sample_id'])) {
  155. $skipped[] = 'Row missing lab number and paddock';
  156. continue;
  157. }
  158. // Avoid duplicates: skip if this lab_no already exists for this client
  159. if (!empty($s['lab_no'])) {
  160. $dup = $pdo->prepare("
  161. SELECT id FROM soil_records
  162. WHERE lab_no = ? AND CAST(client_records_id AS UNSIGNED) = ?
  163. LIMIT 1
  164. ");
  165. $dup->execute([$s['lab_no'], $clientId]);
  166. if ($dup->fetch()) {
  167. $skipped[] = ($s['lab_no'] ?? '') . ' (duplicate)';
  168. continue;
  169. }
  170. }
  171. $rand = (string) (mt_rand(1000, 9999) / 1000); // matches existing rand pattern
  172. $stmt = $pdo->prepare("
  173. INSERT INTO soil_records (
  174. client_records_id, modx_user_id, date, client_name,
  175. analysis_type, lab_no, batch_no, sample_id, site_id,
  176. crop_type, soil_type, date_sampled,
  177. texture, gravel, colour,
  178. ph_cacl2, ph_h2o, ec, ocarbon, omatter, paramag,
  179. NO3_N, NH3_N,
  180. p_mehlick, p_bray2, p_morgan,
  181. k_morgan, ca_morgan, mg_morgan, na_morgan, s_morgan,
  182. b_cacl2, mn_dtpa, zn_dtpa, fe_dtpa, cu_dtpa, al, se,
  183. tec, cec, ca_mehlick3, mg_mehlick3, k_mehlick3, na_mehlick3, al_mehlick3,
  184. rand, status
  185. ) VALUES (
  186. ?, ?, NOW(), ?,
  187. ?, ?, ?, ?, ?,
  188. ?, ?, ?,
  189. ?, ?, ?,
  190. ?, ?, ?, ?, ?, ?,
  191. ?, ?,
  192. ?, ?, ?,
  193. ?, ?, ?, ?, ?,
  194. ?, ?, ?, ?, ?, ?, ?,
  195. ?, ?, ?, ?, ?, ?, ?,
  196. ?, '0'
  197. )
  198. ");
  199. $n = fn(string $key) => isset($s[$key]) && $s[$key] !== '' ? (float) $s[$key] : null;
  200. $t = fn(string $key) => $s[$key] ?? null;
  201. $stmt->execute([
  202. $clientId, $userId, $t('client_name'),
  203. $t('analysis_type'), $t('lab_no'), $t('batch_no'), $t('sample_id'), $t('site_id'),
  204. $t('crop_type'), $t('soil_type'), $t('date_sampled'),
  205. $t('texture'), $n('gravel'), $t('colour'),
  206. $n('ph_cacl2'), $n('ph_h2o'), $n('ec'), $n('ocarbon'), $n('omatter'), $n('paramag'),
  207. $n('NO3_N'), $n('NH3_N'),
  208. $n('p_mehlick'), $n('p_bray2'), $n('p_morgan'),
  209. $n('k_morgan'), $n('ca_morgan'), $n('mg_morgan'), $n('na_morgan'), $n('s_morgan'),
  210. $n('b_cacl2'), $n('mn_dtpa'), $n('zn_dtpa'), $n('fe_dtpa'), $n('cu_dtpa'), $n('al'), $n('se'),
  211. $n('tec'), $n('cec'), $n('ca_mehlick3'), $n('mg_mehlick3'), $n('k_mehlick3'), $n('na_mehlick3'), $n('al_mehlick3'),
  212. $rand,
  213. ]);
  214. $inserted++;
  215. }
  216. jsonOk([
  217. 'inserted' => $inserted,
  218. 'skipped' => $skipped,
  219. 'message' => "{$inserted} sample" . ($inserted !== 1 ? 's' : '') . " imported successfully."
  220. . (count($skipped) ? ' Skipped: ' . implode(', ', $skipped) : ''),
  221. ]);
  222. }
  223. // ─── Generic parser (non-CSBP files) ─────────────────────────────────────────
  224. function genericParse(array $rawData): array
  225. {
  226. // Reuse the original detection + extraction logic for unknown labs
  227. $fmt = detectGenericFormat($rawData);
  228. return $fmt['transposed']
  229. ? extractTransposed($rawData)
  230. : extractRowBased($rawData, $fmt['headerRow']);
  231. }
  232. function detectGenericFormat(array $rawData): array
  233. {
  234. $transposedSignals = [
  235. '1:5', 'total p', 'total k', 'total ca', 'total mg', 'total na',
  236. 'total s', 'total n', 'total b', 'total zn', 'total mn', 'total fe',
  237. 'total cu', 'organic matter', 'organic carbon', 'lab id', 'nitrate ppm',
  238. 'ph 1:5', 'moisture %', 'consultant', 'field name',
  239. ];
  240. $rowBasedSignals = [
  241. 'lab_number', 'custno', 'paddock', 'ph_cacl2', 'ph_h2o',
  242. 'dtpa_cu', 'dtpa_zn', 'dtpa_mn', 'dtpa_fe', 'conducty',
  243. 'orgcarbon', 'nitrate', 'nammonium', 'texture', 'gravel',
  244. 'exc_ca', 'exc_mg', 'exc_na', 'exc_k', 'alum_cacl2',
  245. 'boron_hot', 'client name', 'lab number', 'sat_ca', 'sat_mg',
  246. 'sat_k', 'sat_na', 'crop', 'sp%',
  247. ];
  248. $transposedScore = 0;
  249. foreach (array_slice($rawData, 0, 20) as $row) {
  250. $cell = strtolower($row[0] ?? '');
  251. foreach ($transposedSignals as $sig) {
  252. if (str_contains($cell, $sig)) { $transposedScore++; break; }
  253. }
  254. }
  255. if ($transposedScore >= 2) {
  256. return ['transposed' => true, 'headerRow' => 0];
  257. }
  258. $bestRow = 0; $bestScore = 0;
  259. for ($i = 0; $i < min(10, count($rawData)); $i++) {
  260. $score = 0;
  261. foreach ($rawData[$i] as $cell) {
  262. $cell = strtolower(trim($cell));
  263. foreach ($rowBasedSignals as $sig) {
  264. if (str_contains($cell, $sig)) { $score++; break; }
  265. }
  266. }
  267. if ($score > $bestScore) { $bestScore = $score; $bestRow = $i; }
  268. }
  269. return ['transposed' => false, 'headerRow' => $bestRow];
  270. }
  271. function extractRowBased(array $rawData, int $headerRow): array
  272. {
  273. $headers = $rawData[$headerRow];
  274. $samples = [];
  275. for ($r = $headerRow + 1; $r < count($rawData); $r++) {
  276. $row = $rawData[$r]; $sample = [];
  277. foreach ($headers as $c => $header) {
  278. if ($header === '') continue;
  279. $sample[$header] = $row[$c] ?? '';
  280. }
  281. if (count(array_filter($sample)) >= 3) $samples[] = $sample;
  282. }
  283. return $samples;
  284. }
  285. function extractTransposed(array $rawData): array
  286. {
  287. $labels = array_column($rawData, 0);
  288. $maxCols = max(array_map('count', $rawData));
  289. $samples = [];
  290. for ($col = 1; $col < $maxCols; $col++) {
  291. $sample = [];
  292. foreach ($rawData as $ri => $row) {
  293. $label = trim($labels[$ri] ?? '');
  294. $value = trim($row[$col] ?? '');
  295. if ($label !== '' && $value !== '') $sample[$label] = $value;
  296. }
  297. if (count(array_filter($sample)) >= 3) $samples[] = $sample;
  298. }
  299. return $samples;
  300. }
  301. // ─── Ollama field mapping (generic lab fallback) ──────────────────────────────
  302. function ollamaMap(array $sampleData): array
  303. {
  304. $labJson = json_encode($sampleData, JSON_UNESCAPED_UNICODE);
  305. $prompt = <<<EOT
  306. You are a soil laboratory data mapper. Output ONLY a JSON object — no explanation, no markdown.
  307. Map the LAB DATA to these TARGET FIELDS:
  308. lab_no=Lab reference number
  309. sample_id=Sample/paddock identifier
  310. site_id=Site/block identifier
  311. date_sampled=Date as YYYY-MM-DD
  312. texture=Soil texture
  313. gravel=Gravel % (number)
  314. colour=Soil colour
  315. ocarbon=Organic carbon % (number)
  316. omatter=Organic matter % (number)
  317. ph_cacl2=pH CaCl2 (number)
  318. ph_h2o=pH water (number)
  319. ec=EC dS/m (number)
  320. NO3_N=Nitrate-N mg/kg (number)
  321. NH3_N=Ammonium-N mg/kg (number)
  322. p_morgan=Phosphorus mg/kg (number)
  323. k_morgan=Potassium mg/kg (number)
  324. ca_morgan=Calcium mg/kg (number)
  325. mg_morgan=Magnesium mg/kg (number)
  326. na_morgan=Sodium mg/kg (number)
  327. s_morgan=Sulphur mg/kg (number)
  328. b_cacl2=Boron mg/kg (number)
  329. mn_dtpa=Manganese mg/kg (number)
  330. zn_dtpa=Zinc mg/kg (number)
  331. fe_dtpa=Iron mg/kg (number)
  332. cu_dtpa=Copper mg/kg (number)
  333. al=Aluminium mg/kg (number)
  334. cec=CEC meq/100g (number)
  335. LAB DATA: {$labJson}
  336. Rules: only use values in the data. Strip units. Use null for unmapped. Output JSON only.
  337. EOT;
  338. $payload = json_encode([
  339. 'model' => OLLAMA_MODEL,
  340. 'prompt' => $prompt,
  341. 'stream' => false,
  342. 'options' => ['temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 512],
  343. ]);
  344. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  345. curl_setopt_array($ch, [
  346. CURLOPT_POST => true,
  347. CURLOPT_POSTFIELDS => $payload,
  348. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  349. CURLOPT_RETURNTRANSFER => true,
  350. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  351. CURLOPT_CONNECTTIMEOUT => 5,
  352. ]);
  353. $response = curl_exec($ch);
  354. $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  355. $curlErr = curl_error($ch);
  356. curl_close($ch);
  357. if ($curlErr || $httpCode !== 200) return $sampleData;
  358. $data = json_decode($response, true);
  359. $rawText = trim($data['response'] ?? '');
  360. $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
  361. $rawText = preg_replace('/\s*```$/m', '', $rawText);
  362. if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) $rawText = $m[0];
  363. $mapped = json_decode($rawText, true);
  364. return is_array($mapped) ? $mapped : $sampleData;
  365. }