| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184 |
- <?php
- /**
- * controllers/labParsers/csbp.php
- *
- * Dedicated parser for CSBP Soil Analysis Report files.
- *
- * Format (both .xls and .xlsx):
- * Row 1-2 empty
- * Row 3 "SOIL CONTROL SOIL ANALYSIS REPORT"
- * Row 4 "CSBP LIMITED ABN: ..."
- * Row 5 Units row (mg/kg, pH, dS/m …)
- * Row 6 Column headers (47 columns — see MAP below)
- * Row 7+ One sample per row
- *
- * Provides:
- * csbpDetect(array $rawData): bool
- * csbpParse(array $rawData): array — returns array of mapped sample arrays
- */
- // ─── CSBP column → soil_records field map ────────────────────────────────────
- //
- // Keys are CSBP header names (case-insensitive match).
- // Values are soil_records column names.
- const CSBP_COLUMN_MAP = [
- // Identity / metadata
- 'CLIENT NAME' => 'client_name',
- 'PADDOCK' => 'sample_id', // paddock = sample identifier
- 'CROP' => 'crop_type',
- 'SERIAL_NO' => 'batch_no',
- 'LAB_NUMBER' => 'lab_no',
- // Physical
- 'TEXTURE' => 'texture',
- 'GRAVEL' => 'gravel',
- 'COLOUR' => 'colour',
- 'MOISTURE' => null, // not stored in soil_records
- // Chemical
- 'CONDUCTY' => 'ec',
- 'PH_CACL2' => 'ph_cacl2',
- 'PH_H2O' => 'ph_h2o',
- // Nitrogen
- 'NITRATEN' => 'NO3_N',
- 'AMMONIUM' => 'NH3_N',
- 'TOTALN' => null,
- // Phosphorus
- 'PHOS' => 'p_morgan',
- 'PHOS_OLS' => 'p_bray2', // Olsen P ≈ closest mapped field
- 'PHOS_RETEN' => null,
- 'TOTALP' => null,
- 'PBI' => null,
- // Potassium
- 'POTASSIUM' => 'k_morgan',
- // Other macros
- 'SULPHUR' => 's_morgan',
- 'ORGCARBON' => 'ocarbon',
- 'CHLORIDE' => null,
- 'FIZZ' => null,
- // DTPA micronutrients
- 'DTPA_CU' => 'cu_dtpa',
- 'DTPA_ZN' => 'zn_dtpa',
- 'DTPA_MN' => 'mn_dtpa',
- 'DTPA_FE' => 'fe_dtpa',
- // EDTA micronutrients (map to same fields — DTPA takes priority if both present)
- 'EDTA_CU' => 'cu_dtpa',
- 'EDTA_ZN' => 'zn_dtpa',
- 'EDTA_MN' => 'mn_dtpa',
- 'EDTA_FE' => 'fe_dtpa',
- // Other micros
- 'IRON' => 'fe_dtpa', // generic iron reading
- 'BORON_HOT' => 'b_cacl2',
- 'ALUM_CACL2' => 'al',
- // Exchangeable cations (base saturation)
- 'EXC_CA' => 'ca_morgan',
- 'EXC_MG' => 'mg_morgan',
- 'EXC_NA' => 'na_morgan',
- 'EXC_K' => null, // separate from k_morgan (extractable)
- 'EXC_AL' => 'al_mehlick3',
- // Base saturation %
- 'SAT_Ca' => 'ca_mehlick3',
- 'SAT_Mg' => 'mg_mehlick3',
- 'SAT_K' => 'k_mehlick3',
- 'SAT_Na' => 'na_mehlick3',
- 'SAT_COND' => 'cec', // conductance of saturation extract ≈ CEC proxy
- 'SP%' => null,
- ];
- /**
- * Check if a 2-D raw data array looks like a CSBP file.
- * Looks for "CSBP" or "SOIL CONTROL SOIL ANALYSIS" in the first 6 rows.
- */
- function csbpDetect(array $rawData): bool
- {
- foreach (array_slice($rawData, 0, 6) as $row) {
- foreach ($row as $cell) {
- $cell = strtoupper(trim((string) $cell));
- if (str_contains($cell, 'CSBP') || str_contains($cell, 'SOIL CONTROL SOIL ANALYSIS')) {
- return true;
- }
- }
- }
- return false;
- }
- /**
- * Find the header row index inside $rawData.
- * The CSBP header row contains "LAB_NUMBER" and "PADDOCK".
- */
- function csbpFindHeaderRow(array $rawData): int
- {
- foreach ($rawData as $idx => $row) {
- $cells = array_map(fn($c) => strtoupper(trim((string) $c)), $row);
- if (in_array('LAB_NUMBER', $cells, true) && in_array('PADDOCK', $cells, true)) {
- return $idx;
- }
- }
- return 5; // fallback: row index 5 (row 6 in 1-based)
- }
- /**
- * Parse a CSBP raw data array into an array of mapped sample arrays.
- * Each sample is keyed by soil_records column names.
- *
- * @param array $rawData 2-D array from PhpSpreadsheet
- * @return array [ ['lab_no'=>..., 'ph_cacl2'=>..., ...], ... ]
- */
- function csbpParse(array $rawData): array
- {
- $headerRow = csbpFindHeaderRow($rawData);
- $headers = array_map(fn($c) => trim((string) $c), $rawData[$headerRow]);
- // Build index: column position → soil_records field name (or null to skip)
- $colIndex = [];
- foreach ($headers as $col => $header) {
- if ($header === '') continue;
- // Case-insensitive lookup in the map
- $upperHeader = strtoupper($header);
- foreach (CSBP_COLUMN_MAP as $csbpCol => $dbField) {
- if (strtoupper($csbpCol) === $upperHeader) {
- $colIndex[$col] = $dbField; // null means skip
- break;
- }
- }
- }
- $samples = [];
- for ($r = $headerRow + 1; $r < count($rawData); $r++) {
- $row = $rawData[$r];
- $sample = [];
- $hasData = false;
- foreach ($colIndex as $col => $dbField) {
- $raw = trim((string) ($row[$col] ?? ''));
- if ($dbField === null) continue; // unmapped column
- if ($raw === '') continue;
- // Don't overwrite a DTPA value with an EDTA value
- if (isset($sample[$dbField]) && $sample[$dbField] !== '') continue;
- $sample[$dbField] = $raw;
- $hasData = true;
- }
- // Skip completely empty rows
- if (!$hasData) continue;
- // Mark the lab source
- $sample['analysis_type'] = 'CSBP';
- $samples[] = $sample;
- }
- return $samples;
- }
|