2 ماه پیش · 0acefcb84b
--- a/config/ai.php
+++ b/config/ai.php
@@ -2,11 +2,23 @@
 
				 /**
			
 
				  * config/ai.php
			
 
				  *
			
 
				- * Ollama LLM configuration shared by controllers that need AI inference.
			
 
				- * Matches the setup used in controllers/ollamaGenerate.php.
			
 
				+ * LLM backend configuration.
			
 
				+ * Primary: llama.cpp server (faster, local GPU inference)
			
 
				+ * Fallback: Ollama          (used if llama.cpp is unreachable)
			
 
				  */
			
 
				 
			
 
				-define('OLLAMA_HOST',       'http://192.168.8.73:11434');
			
 
				-define('OLLAMA_MODEL',      'llama3.1:8b-instruct-q4_K_M');
			
 
				-define('OLLAMA_TIMEOUT',    60);   // seconds — field mapping is fast
			
 
				-define('OLLAMA_TEMPERATURE', 0.1); // low temp for deterministic JSON output
			
 
				+// ── llama.cpp (primary) ───────────────────────────────────────────────────────
			
 
				+define('LLAMACPP_HOST',         'http://192.168.8.73:11433');  // adjust to your llama.cpp server
			
 
				+define('LLAMACPP_TIMEOUT',      120);  // seconds
			
 
				+define('LLAMACPP_TEMPERATURE',  100);
			
 
				+define('LLAMACPP_TOP_P',        0.95);
			
 
				+define('LLAMACPP_TOP_K',        40);
			
 
				+
			
 
				+// ── Ollama (fallback) ─────────────────────────────────────────────────────────
			
 
				+define('OLLAMA_HOST',           'http://192.168.8.73:11434');
			
 
				+define('OLLAMA_MODEL',          'llama3.1:8b-instruct-q4_K_M');
			
 
				+define('OLLAMA_TIMEOUT',        60);   // seconds — field mapping is fast
			
 
				+define('OLLAMA_TEMPERATURE',    0.1);  // low temp for deterministic JSON output
			
 
				+
			
 
				+// ── Shared ────────────────────────────────────────────────────────────────────
			
 
				+define('EMBED_MODEL',           'nomic-embed-text');  // Ollama embedding model (fallback)
			
--- a/controllers/ollamaGenerate.php
+++ b/controllers/ollamaGenerate.php
@@ -31,15 +31,13 @@ if (session_status() === PHP_SESSION_NONE) {
 
				 require_once __DIR__ . '/../config/database.php';
			
 
				 require_once __DIR__ . '/../lib/auth.php';
			
 
				 require_once __DIR__ . '/../lib/csrf.php';
			
 
				+require_once __DIR__ . '/../lib/llm.php';  // llama.cpp primary + Ollama fallback
			
 
				 
			
 
				 header('Content-Type: application/json');
			
 
				 
			
 
				 // ── Config ───────────────────────────────────────────────────────────────────
			
 
				-define('OLLAMA_HOST',    'http://192.168.8.73:11434');
			
 
				-define('OLLAMA_MODEL',   'llama3.1:8b-instruct-q4_K_M');
			
 
				-define('EMBED_MODEL',    'nomic-embed-text');
			
 
				-define('RAG_TOP_K',      6);    // book passages injected per request
			
 
				-define('OLLAMA_TIMEOUT', 180);  // seconds — LLM can be slow
			
 
				+define('RAG_TOP_K', 6);  // book passages injected per request
			
 
				+// LLAMACPP_HOST, OLLAMA_HOST, OLLAMA_MODEL, EMBED_MODEL — all from lib/llm.php → config/ai.php
			
 
				 
			
 
				 // ── Auth + CSRF ───────────────────────────────────────────────────────────────
			
 
				 if (!isLoggedIn()) {
			
@@ -659,60 +657,24 @@ if ($recordType === 'plant') {
 
				 
			
 
				 }
			
 
				 
			
 
				-// ── Call Ollama ───────────────────────────────────────────────────────────────
			
 
				-$payload = json_encode([
			
 
				-    'model'  => OLLAMA_MODEL,
			
 
				-    'prompt' => $prompts[$section],
			
 
				-    'stream' => false,
			
 
				-    'options' => [
			
 
				+// ── Call LLM (llama.cpp primary → Ollama fallback) ───────────────────────────
			
 
				+try {
			
 
				+    $text = llmGenerate($prompts[$section], [
			
 
				         'temperature'    => 0.3,
			
 
				         'num_predict'    => 2048,
			
 
				         'num_ctx'        => 6144,
			
 
				         'repeat_penalty' => 1.1,
			
 
				-        'keep_alive'     => -1,   // keep model resident between requests
			
 
				-    ],
			
 
				-]);
			
 
				-
			
 
				-$ch = curl_init(OLLAMA_HOST . '/api/generate');
			
 
				-curl_setopt_array($ch, [
			
 
				-    CURLOPT_POST           => true,
			
 
				-    CURLOPT_POSTFIELDS     => $payload,
			
 
				-    CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				-    CURLOPT_RETURNTRANSFER => true,
			
 
				-    CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
			
 
				-    CURLOPT_CONNECTTIMEOUT => 5,
			
 
				-]);
			
 
				-
			
 
				-$response = curl_exec($ch);
			
 
				-$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				-$curlErr  = curl_error($ch);
			
 
				-curl_close($ch);
			
 
				-
			
 
				-if ($curlErr || $response === false) {
			
 
				-    http_response_code(502);
			
 
				-    echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
			
 
				-    exit;
			
 
				-}
			
 
				-
			
 
				-if ($httpCode !== 200) {
			
 
				-    http_response_code(502);
			
 
				-    echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
			
 
				-    exit;
			
 
				-}
			
 
				-
			
 
				-$ollamaData = json_decode($response, true);
			
 
				-$text = trim($ollamaData['response'] ?? '');
			
 
				-
			
 
				-if ($text === '') {
			
 
				+    ]);
			
 
				+} catch (RuntimeException $e) {
			
 
				     http_response_code(502);
			
 
				-    echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
			
 
				+    echo json_encode(['success' => false, 'error' => 'All LLM backends unavailable: ' . $e->getMessage()]);
			
 
				     exit;
			
 
				 }
			
 
				 
			
 
				 echo json_encode([
			
 
				-    'success'          => true,
			
 
				-    'text'             => $text,
			
 
				-    'rag_chunks_used'  => count($ragChunks),
			
 
				+    'success'         => true,
			
 
				+    'text'            => $text,
			
 
				+    'rag_chunks_used' => count($ragChunks),
			
 
				 ]);
			
 
				 exit;
			
 
				 
			
@@ -742,49 +704,7 @@ function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, in
 
				 
			
 
				 function getQueryEmbedding(string $text): ?array
			
 
				 {
			
 
				-    $queryText = substr($text, 0, 2000);
			
 
				-
			
 
				-    // Try new /api/embed (Ollama >= 0.1.26) first
			
 
				-    $ch = curl_init(OLLAMA_HOST . '/api/embed');
			
 
				-    curl_setopt_array($ch, [
			
 
				-        CURLOPT_POST           => true,
			
 
				-        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
			
 
				-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				-        CURLOPT_RETURNTRANSFER => true,
			
 
				-        CURLOPT_TIMEOUT        => 15,
			
 
				-        CURLOPT_CONNECTTIMEOUT => 3,
			
 
				-    ]);
			
 
				-    $resp = curl_exec($ch);
			
 
				-    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				-    curl_close($ch);
			
 
				-
			
 
				-    if ($resp && $code === 200) {
			
 
				-        $data = json_decode($resp, true);
			
 
				-        $emb  = $data['embeddings'][0] ?? null;
			
 
				-        if (is_array($emb) && count($emb) > 0) return $emb;
			
 
				-    }
			
 
				-
			
 
				-    // Fallback: legacy /api/embeddings
			
 
				-    $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
			
 
				-    curl_setopt_array($ch, [
			
 
				-        CURLOPT_POST           => true,
			
 
				-        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
			
 
				-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				-        CURLOPT_RETURNTRANSFER => true,
			
 
				-        CURLOPT_TIMEOUT        => 15,
			
 
				-        CURLOPT_CONNECTTIMEOUT => 3,
			
 
				-    ]);
			
 
				-    $resp2 = curl_exec($ch);
			
 
				-    $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				-    curl_close($ch);
			
 
				-
			
 
				-    if ($resp2 && $code2 === 200) {
			
 
				-        $data2 = json_decode($resp2, true);
			
 
				-        $emb2  = $data2['embedding'] ?? null;
			
 
				-        if (is_array($emb2) && count($emb2) > 0) return $emb2;
			
 
				-    }
			
 
				-
			
 
				-    return null;
			
 
				+    return llmEmbed($text);  // llama.cpp primary → Ollama fallback (see lib/llm.php)
			
 
				 }
			
 
				 
			
 
				 function vectorSearch(PDO $pdo, array $queryVec, int $topK): array
			
--- a/controllers/soilImportController.php
+++ b/controllers/soilImportController.php
@@ -16,7 +16,7 @@
 
				  */
			
 
				 
			
 
				 require_once __DIR__ . '/../config/database.php';
			
 
				-require_once __DIR__ . '/../config/ai.php';
			
 
				+require_once __DIR__ . '/../lib/llm.php';  // llama.cpp primary + Ollama fallback (includes config/ai.php)
			
 
				 require_once __DIR__ . '/../lib/auth.php';
			
 
				 require_once __DIR__ . '/../lib/csrf.php';
			
 
				 require_once __DIR__ . '/labParsers/csbp.php';
			
@@ -395,31 +395,15 @@ LAB DATA: {$labJson}
 
				 Rules: only use values in the data. Strip units. Use null for unmapped. Output JSON only.
			
 
				 EOT;
			
 
				 
			
 
				-    $payload = json_encode([
			
 
				-        'model'  => OLLAMA_MODEL,
			
 
				-        'prompt' => $prompt,
			
 
				-        'stream' => false,
			
 
				-        'options' => ['temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 512],
			
 
				-    ]);
			
 
				-
			
 
				-    $ch = curl_init(OLLAMA_HOST . '/api/generate');
			
 
				-    curl_setopt_array($ch, [
			
 
				-        CURLOPT_POST           => true,
			
 
				-        CURLOPT_POSTFIELDS     => $payload,
			
 
				-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				-        CURLOPT_RETURNTRANSFER => true,
			
 
				-        CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
			
 
				-        CURLOPT_CONNECTTIMEOUT => 5,
			
 
				-    ]);
			
 
				-    $response = curl_exec($ch);
			
 
				-    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				-    $curlErr  = curl_error($ch);
			
 
				-    curl_close($ch);
			
 
				-
			
 
				-    if ($curlErr || $httpCode !== 200) return $sampleData;
			
 
				-
			
 
				-    $data    = json_decode($response, true);
			
 
				-    $rawText = trim($data['response'] ?? '');
			
 
				+    try {
			
 
				+        $rawText = llmGenerate($prompt, [
			
 
				+            'temperature' => OLLAMA_TEMPERATURE,
			
 
				+            'num_predict' => 512,
			
 
				+        ]);
			
 
				+    } catch (RuntimeException $e) {
			
 
				+        return $sampleData;  // All backends failed — return unmapped data
			
 
				+    }
			
 
				+    $rawText = trim($rawText);
			
 
				     $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
			
 
				     $rawText = preg_replace('/\s*```$/m', '', $rawText);
			
 
				     if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) $rawText = $m[0];
			
--- a/lib/llm.php
+++ b/lib/llm.php
@@ -0,0 +1,220 @@
 
				+<?php
			
 
				+/**
			
 
				+ * lib/llm.php
			
 
				+ *
			
 
				+ * Unified LLM inference helper.
			
 
				+ * Primary:  llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
			
 
				+ * Fallback: Ollama            (OLLAMA_HOST)   — /api/generate + /api/embed
			
 
				+ *
			
 
				+ * Public API:
			
 
				+ *   llmGenerate(string $prompt, array $options = []): string
			
 
				+ *   llmEmbed(string $text): ?array
			
 
				+ *
			
 
				+ * $options keys (all optional):
			
 
				+ *   temperature    float  default 0.3
			
 
				+ *   num_predict    int    default 2048
			
 
				+ *   num_ctx        int    default 6144  (Ollama only — ignored by llama.cpp)
			
 
				+ *   repeat_penalty float  default 1.1
			
 
				+ */
			
 
				+
			
 
				+require_once __DIR__ . '/../config/ai.php';
			
 
				+
			
 
				+// ── Public functions ──────────────────────────────────────────────────────────
			
 
				+
			
 
				+/**
			
 
				+ * Generate text from a prompt.
			
 
				+ * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
			
 
				+ *
			
 
				+ * @throws RuntimeException when both backends fail
			
 
				+ */
			
 
				+function llmGenerate(string $prompt, array $options = []): string
			
 
				+{
			
 
				+    $text = _llamacppGenerate($prompt, $options);
			
 
				+    if ($text !== null) {
			
 
				+        return $text;
			
 
				+    }
			
 
				+
			
 
				+    error_log('[llm] llama.cpp unavailable — falling back to Ollama');
			
 
				+
			
 
				+    $text = _ollamaGenerate($prompt, $options);
			
 
				+    if ($text !== null) {
			
 
				+        return $text;
			
 
				+    }
			
 
				+
			
 
				+    throw new RuntimeException('All LLM backends unavailable');
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * Embed text into a float vector.
			
 
				+ * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
			
 
				+ * Returns null only when both backends fail.
			
 
				+ */
			
 
				+function llmEmbed(string $text): ?array
			
 
				+{
			
 
				+    $text = substr($text, 0, 2000);
			
 
				+
			
 
				+    $emb = _llamacppEmbed($text);
			
 
				+    if ($emb !== null) {
			
 
				+        return $emb;
			
 
				+    }
			
 
				+
			
 
				+    error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');
			
 
				+
			
 
				+    return _ollamaEmbed($text);
			
 
				+}
			
 
				+
			
 
				+// ── llama.cpp backend ─────────────────────────────────────────────────────────
			
 
				+
			
 
				+function _llamacppGenerate(string $prompt, array $options): ?string
			
 
				+{
			
 
				+    $payload = json_encode([
			
 
				+        'prompt'         => $prompt,
			
 
				+        'n_predict'      => $options['num_predict']    ?? 2048,
			
 
				+        'temperature'    => $options['temperature']    ?? 0.3,
			
 
				+        'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
			
 
				+        'stop'           => $options['stop']           ?? [],
			
 
				+        'stream'         => false,
			
 
				+    ]);
			
 
				+
			
 
				+    $ch = curl_init(LLAMACPP_HOST . '/completion');
			
 
				+    curl_setopt_array($ch, [
			
 
				+        CURLOPT_POST           => true,
			
 
				+        CURLOPT_POSTFIELDS     => $payload,
			
 
				+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				+        CURLOPT_RETURNTRANSFER => true,
			
 
				+        CURLOPT_TIMEOUT        => LLAMACPP_TIMEOUT,
			
 
				+        CURLOPT_CONNECTTIMEOUT => 3,
			
 
				+    ]);
			
 
				+
			
 
				+    $resp = curl_exec($ch);
			
 
				+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				+    $err  = curl_error($ch);
			
 
				+    curl_close($ch);
			
 
				+
			
 
				+    if ($err || $resp === false || $code !== 200) {
			
 
				+        error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
			
 
				+        return null;
			
 
				+    }
			
 
				+
			
 
				+    $data = json_decode($resp, true);
			
 
				+    $text = trim($data['content'] ?? '');
			
 
				+    return $text !== '' ? $text : null;
			
 
				+}
			
 
				+
			
 
				+function _llamacppEmbed(string $text): ?array
			
 
				+{
			
 
				+    // llama.cpp OpenAI-compat embedding endpoint
			
 
				+    $payload = json_encode(['input' => $text]);
			
 
				+
			
 
				+    $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
			
 
				+    curl_setopt_array($ch, [
			
 
				+        CURLOPT_POST           => true,
			
 
				+        CURLOPT_POSTFIELDS     => $payload,
			
 
				+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				+        CURLOPT_RETURNTRANSFER => true,
			
 
				+        CURLOPT_TIMEOUT        => 15,
			
 
				+        CURLOPT_CONNECTTIMEOUT => 3,
			
 
				+    ]);
			
 
				+
			
 
				+    $resp = curl_exec($ch);
			
 
				+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				+    $err  = curl_error($ch);
			
 
				+    curl_close($ch);
			
 
				+
			
 
				+    if ($err || $resp === false || $code !== 200) {
			
 
				+        error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
			
 
				+        return null;
			
 
				+    }
			
 
				+
			
 
				+    $data = json_decode($resp, true);
			
 
				+    $emb  = $data['data'][0]['embedding'] ?? null;
			
 
				+    return (is_array($emb) && count($emb) > 0) ? $emb : null;
			
 
				+}
			
 
				+
			
 
				+// ── Ollama backend ────────────────────────────────────────────────────────────
			
 
				+
			
 
				+function _ollamaGenerate(string $prompt, array $options): ?string
			
 
				+{
			
 
				+    $payload = json_encode([
			
 
				+        'model'  => OLLAMA_MODEL,
			
 
				+        'prompt' => $prompt,
			
 
				+        'stream' => false,
			
 
				+        'options' => [
			
 
				+            'temperature'    => $options['temperature']    ?? 0.3,
			
 
				+            'num_predict'    => $options['num_predict']    ?? 2048,
			
 
				+            'num_ctx'        => $options['num_ctx']        ?? 6144,
			
 
				+            'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
			
 
				+            'keep_alive'     => -1,
			
 
				+        ],
			
 
				+    ]);
			
 
				+
			
 
				+    $ch = curl_init(OLLAMA_HOST . '/api/generate');
			
 
				+    curl_setopt_array($ch, [
			
 
				+        CURLOPT_POST           => true,
			
 
				+        CURLOPT_POSTFIELDS     => $payload,
			
 
				+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				+        CURLOPT_RETURNTRANSFER => true,
			
 
				+        CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
			
 
				+        CURLOPT_CONNECTTIMEOUT => 5,
			
 
				+    ]);
			
 
				+
			
 
				+    $resp = curl_exec($ch);
			
 
				+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				+    $err  = curl_error($ch);
			
 
				+    curl_close($ch);
			
 
				+
			
 
				+    if ($err || $resp === false || $code !== 200) {
			
 
				+        error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
			
 
				+        return null;
			
 
				+    }
			
 
				+
			
 
				+    $data = json_decode($resp, true);
			
 
				+    $text = trim($data['response'] ?? '');
			
 
				+    return $text !== '' ? $text : null;
			
 
				+}
			
 
				+
			
 
				+function _ollamaEmbed(string $text): ?array
			
 
				+{
			
 
				+    // Try /api/embed (Ollama >= 0.1.26) first
			
 
				+    $ch = curl_init(OLLAMA_HOST . '/api/embed');
			
 
				+    curl_setopt_array($ch, [
			
 
				+        CURLOPT_POST           => true,
			
 
				+        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
			
 
				+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				+        CURLOPT_RETURNTRANSFER => true,
			
 
				+        CURLOPT_TIMEOUT        => 15,
			
 
				+        CURLOPT_CONNECTTIMEOUT => 5,
			
 
				+    ]);
			
 
				+    $resp = curl_exec($ch);
			
 
				+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				+    curl_close($ch);
			
 
				+
			
 
				+    if ($resp && $code === 200) {
			
 
				+        $data = json_decode($resp, true);
			
 
				+        $emb  = $data['embeddings'][0] ?? null;
			
 
				+        if (is_array($emb) && count($emb) > 0) return $emb;
			
 
				+    }
			
 
				+
			
 
				+    // Fallback: legacy /api/embeddings
			
 
				+    $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
			
 
				+    curl_setopt_array($ch, [
			
 
				+        CURLOPT_POST           => true,
			
 
				+        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
			
 
				+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
			
 
				+        CURLOPT_RETURNTRANSFER => true,
			
 
				+        CURLOPT_TIMEOUT        => 15,
			
 
				+        CURLOPT_CONNECTTIMEOUT => 5,
			
 
				+    ]);
			
 
				+    $resp2 = curl_exec($ch);
			
 
				+    $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
			
 
				+    curl_close($ch);
			
 
				+
			
 
				+    if ($resp2 && $code2 === 200) {
			
 
				+        $data2 = json_decode($resp2, true);
			
 
				+        $emb2  = $data2['embedding'] ?? null;
			
 
				+        if (is_array($emb2) && count($emb2) > 0) return $emb2;
			
 
				+    }
			
 
				+
			
 
				+    error_log('[llm] All embed backends failed');
			
 
				+    return null;
			
 
				+}