فهرست منبع

upgrade to LLAMA.CP

Benjamin Harris 2 ماه پیش
والد
کامیت
0acefcb84b
4فایلهای تغییر یافته به همراه261 افزوده شده و 125 حذف شده
  1. 18 6
      config/ai.php
  2. 13 93
      controllers/ollamaGenerate.php
  3. 10 26
      controllers/soilImportController.php
  4. 220 0
      lib/llm.php

+ 18 - 6
config/ai.php

@@ -2,11 +2,23 @@
 /**
  * config/ai.php
  *
- * Ollama LLM configuration shared by controllers that need AI inference.
- * Matches the setup used in controllers/ollamaGenerate.php.
+ * LLM backend configuration.
+ * Primary: llama.cpp server (faster, local GPU inference)
+ * Fallback: Ollama          (used if llama.cpp is unreachable)
  */
 
-define('OLLAMA_HOST',       'http://192.168.8.73:11434');
-define('OLLAMA_MODEL',      'llama3.1:8b-instruct-q4_K_M');
-define('OLLAMA_TIMEOUT',    60);   // seconds — field mapping is fast
-define('OLLAMA_TEMPERATURE', 0.1); // low temp for deterministic JSON output
+// ── llama.cpp (primary) ───────────────────────────────────────────────────────
+define('LLAMACPP_HOST',         'http://192.168.8.73:11433');  // adjust to your llama.cpp server
+define('LLAMACPP_TIMEOUT',      120);  // seconds
+define('LLAMACPP_TEMPERATURE',  100);
+define('LLAMACPP_TOP_P',        0.95);
+define('LLAMACPP_TOP_K',        40);
+
+// ── Ollama (fallback) ─────────────────────────────────────────────────────────
+define('OLLAMA_HOST',           'http://192.168.8.73:11434');
+define('OLLAMA_MODEL',          'llama3.1:8b-instruct-q4_K_M');
+define('OLLAMA_TIMEOUT',        60);   // seconds — field mapping is fast
+define('OLLAMA_TEMPERATURE',    0.1);  // low temp for deterministic JSON output
+
+// ── Shared ────────────────────────────────────────────────────────────────────
+define('EMBED_MODEL',           'nomic-embed-text');  // Ollama embedding model (fallback)

+ 13 - 93
controllers/ollamaGenerate.php

@@ -31,15 +31,13 @@ if (session_status() === PHP_SESSION_NONE) {
 require_once __DIR__ . '/../config/database.php';
 require_once __DIR__ . '/../lib/auth.php';
 require_once __DIR__ . '/../lib/csrf.php';
+require_once __DIR__ . '/../lib/llm.php';  // llama.cpp primary + Ollama fallback
 
 header('Content-Type: application/json');
 
 // ── Config ───────────────────────────────────────────────────────────────────
-define('OLLAMA_HOST',    'http://192.168.8.73:11434');
-define('OLLAMA_MODEL',   'llama3.1:8b-instruct-q4_K_M');
-define('EMBED_MODEL',    'nomic-embed-text');
-define('RAG_TOP_K',      6);    // book passages injected per request
-define('OLLAMA_TIMEOUT', 180);  // seconds — LLM can be slow
+define('RAG_TOP_K', 6);  // book passages injected per request
+// LLAMACPP_HOST, OLLAMA_HOST, OLLAMA_MODEL, EMBED_MODEL — all from lib/llm.php → config/ai.php
 
 // ── Auth + CSRF ───────────────────────────────────────────────────────────────
 if (!isLoggedIn()) {
@@ -659,60 +657,24 @@ if ($recordType === 'plant') {
 
 }
 
-// ── Call Ollama ───────────────────────────────────────────────────────────────
-$payload = json_encode([
-    'model'  => OLLAMA_MODEL,
-    'prompt' => $prompts[$section],
-    'stream' => false,
-    'options' => [
+// ── Call LLM (llama.cpp primary → Ollama fallback) ───────────────────────────
+try {
+    $text = llmGenerate($prompts[$section], [
         'temperature'    => 0.3,
         'num_predict'    => 2048,
         'num_ctx'        => 6144,
         'repeat_penalty' => 1.1,
-        'keep_alive'     => -1,   // keep model resident between requests
-    ],
-]);
-
-$ch = curl_init(OLLAMA_HOST . '/api/generate');
-curl_setopt_array($ch, [
-    CURLOPT_POST           => true,
-    CURLOPT_POSTFIELDS     => $payload,
-    CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
-    CURLOPT_RETURNTRANSFER => true,
-    CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
-    CURLOPT_CONNECTTIMEOUT => 5,
-]);
-
-$response = curl_exec($ch);
-$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-$curlErr  = curl_error($ch);
-curl_close($ch);
-
-if ($curlErr || $response === false) {
-    http_response_code(502);
-    echo json_encode(['success' => false, 'error' => 'Could not connect to Ollama: ' . ($curlErr ?: 'no response')]);
-    exit;
-}
-
-if ($httpCode !== 200) {
-    http_response_code(502);
-    echo json_encode(['success' => false, 'error' => 'Ollama returned HTTP ' . $httpCode]);
-    exit;
-}
-
-$ollamaData = json_decode($response, true);
-$text = trim($ollamaData['response'] ?? '');
-
-if ($text === '') {
+    ]);
+} catch (RuntimeException $e) {
     http_response_code(502);
-    echo json_encode(['success' => false, 'error' => 'Ollama returned an empty response']);
+    echo json_encode(['success' => false, 'error' => 'All LLM backends unavailable: ' . $e->getMessage()]);
     exit;
 }
 
 echo json_encode([
-    'success'          => true,
-    'text'             => $text,
-    'rag_chunks_used'  => count($ragChunks),
+    'success'         => true,
+    'text'            => $text,
+    'rag_chunks_used' => count($ragChunks),
 ]);
 exit;
 
@@ -742,49 +704,7 @@ function retrieveRelevantChunks(PDO $pdo, string $queryText, string $section, in
 
 function getQueryEmbedding(string $text): ?array
 {
-    $queryText = substr($text, 0, 2000);
-
-    // Try new /api/embed (Ollama >= 0.1.26) first
-    $ch = curl_init(OLLAMA_HOST . '/api/embed');
-    curl_setopt_array($ch, [
-        CURLOPT_POST           => true,
-        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'input' => $queryText]),
-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
-        CURLOPT_RETURNTRANSFER => true,
-        CURLOPT_TIMEOUT        => 15,
-        CURLOPT_CONNECTTIMEOUT => 3,
-    ]);
-    $resp = curl_exec($ch);
-    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-    curl_close($ch);
-
-    if ($resp && $code === 200) {
-        $data = json_decode($resp, true);
-        $emb  = $data['embeddings'][0] ?? null;
-        if (is_array($emb) && count($emb) > 0) return $emb;
-    }
-
-    // Fallback: legacy /api/embeddings
-    $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
-    curl_setopt_array($ch, [
-        CURLOPT_POST           => true,
-        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'prompt' => $queryText]),
-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
-        CURLOPT_RETURNTRANSFER => true,
-        CURLOPT_TIMEOUT        => 15,
-        CURLOPT_CONNECTTIMEOUT => 3,
-    ]);
-    $resp2 = curl_exec($ch);
-    $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-    curl_close($ch);
-
-    if ($resp2 && $code2 === 200) {
-        $data2 = json_decode($resp2, true);
-        $emb2  = $data2['embedding'] ?? null;
-        if (is_array($emb2) && count($emb2) > 0) return $emb2;
-    }
-
-    return null;
+    return llmEmbed($text);  // llama.cpp primary → Ollama fallback (see lib/llm.php)
 }
 
 function vectorSearch(PDO $pdo, array $queryVec, int $topK): array

+ 10 - 26
controllers/soilImportController.php

@@ -16,7 +16,7 @@
  */
 
 require_once __DIR__ . '/../config/database.php';
-require_once __DIR__ . '/../config/ai.php';
+require_once __DIR__ . '/../lib/llm.php';  // llama.cpp primary + Ollama fallback (includes config/ai.php)
 require_once __DIR__ . '/../lib/auth.php';
 require_once __DIR__ . '/../lib/csrf.php';
 require_once __DIR__ . '/labParsers/csbp.php';
@@ -395,31 +395,15 @@ LAB DATA: {$labJson}
 Rules: only use values in the data. Strip units. Use null for unmapped. Output JSON only.
 EOT;
 
-    $payload = json_encode([
-        'model'  => OLLAMA_MODEL,
-        'prompt' => $prompt,
-        'stream' => false,
-        'options' => ['temperature' => OLLAMA_TEMPERATURE, 'num_predict' => 512],
-    ]);
-
-    $ch = curl_init(OLLAMA_HOST . '/api/generate');
-    curl_setopt_array($ch, [
-        CURLOPT_POST           => true,
-        CURLOPT_POSTFIELDS     => $payload,
-        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
-        CURLOPT_RETURNTRANSFER => true,
-        CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
-        CURLOPT_CONNECTTIMEOUT => 5,
-    ]);
-    $response = curl_exec($ch);
-    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-    $curlErr  = curl_error($ch);
-    curl_close($ch);
-
-    if ($curlErr || $httpCode !== 200) return $sampleData;
-
-    $data    = json_decode($response, true);
-    $rawText = trim($data['response'] ?? '');
+    try {
+        $rawText = llmGenerate($prompt, [
+            'temperature' => OLLAMA_TEMPERATURE,
+            'num_predict' => 512,
+        ]);
+    } catch (RuntimeException $e) {
+        return $sampleData;  // All backends failed — return unmapped data
+    }
+    $rawText = trim($rawText);
     $rawText = preg_replace('/^```(?:json)?\s*/i', '', $rawText);
     $rawText = preg_replace('/\s*```$/m', '', $rawText);
     if (preg_match('/\{[\s\S]+\}/', $rawText, $m)) $rawText = $m[0];

+ 220 - 0
lib/llm.php

@@ -0,0 +1,220 @@
+<?php
+/**
+ * lib/llm.php
+ *
+ * Unified LLM inference helper.
+ * Primary:  llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
+ * Fallback: Ollama            (OLLAMA_HOST)   — /api/generate + /api/embed
+ *
+ * Public API:
+ *   llmGenerate(string $prompt, array $options = []): string
+ *   llmEmbed(string $text): ?array
+ *
+ * $options keys (all optional):
+ *   temperature    float  default 0.3
+ *   num_predict    int    default 2048
+ *   num_ctx        int    default 6144  (Ollama only — ignored by llama.cpp)
+ *   repeat_penalty float  default 1.1
+ */
+
+require_once __DIR__ . '/../config/ai.php';
+
+// ── Public functions ──────────────────────────────────────────────────────────
+
+/**
+ * Generate text from a prompt.
+ * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
+ *
+ * @throws RuntimeException when both backends fail
+ */
+function llmGenerate(string $prompt, array $options = []): string
+{
+    $text = _llamacppGenerate($prompt, $options);
+    if ($text !== null) {
+        return $text;
+    }
+
+    error_log('[llm] llama.cpp unavailable — falling back to Ollama');
+
+    $text = _ollamaGenerate($prompt, $options);
+    if ($text !== null) {
+        return $text;
+    }
+
+    throw new RuntimeException('All LLM backends unavailable');
+}
+
+/**
+ * Embed text into a float vector.
+ * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
+ * Returns null only when both backends fail.
+ */
+function llmEmbed(string $text): ?array
+{
+    $text = substr($text, 0, 2000);
+
+    $emb = _llamacppEmbed($text);
+    if ($emb !== null) {
+        return $emb;
+    }
+
+    error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');
+
+    return _ollamaEmbed($text);
+}
+
+// ── llama.cpp backend ─────────────────────────────────────────────────────────
+
+function _llamacppGenerate(string $prompt, array $options): ?string
+{
+    $payload = json_encode([
+        'prompt'         => $prompt,
+        'n_predict'      => $options['num_predict']    ?? 2048,
+        'temperature'    => $options['temperature']    ?? 0.3,
+        'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
+        'stop'           => $options['stop']           ?? [],
+        'stream'         => false,
+    ]);
+
+    $ch = curl_init(LLAMACPP_HOST . '/completion');
+    curl_setopt_array($ch, [
+        CURLOPT_POST           => true,
+        CURLOPT_POSTFIELDS     => $payload,
+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_TIMEOUT        => LLAMACPP_TIMEOUT,
+        CURLOPT_CONNECTTIMEOUT => 3,
+    ]);
+
+    $resp = curl_exec($ch);
+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    $err  = curl_error($ch);
+    curl_close($ch);
+
+    if ($err || $resp === false || $code !== 200) {
+        error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
+        return null;
+    }
+
+    $data = json_decode($resp, true);
+    $text = trim($data['content'] ?? '');
+    return $text !== '' ? $text : null;
+}
+
+function _llamacppEmbed(string $text): ?array
+{
+    // llama.cpp OpenAI-compat embedding endpoint
+    $payload = json_encode(['input' => $text]);
+
+    $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
+    curl_setopt_array($ch, [
+        CURLOPT_POST           => true,
+        CURLOPT_POSTFIELDS     => $payload,
+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_TIMEOUT        => 15,
+        CURLOPT_CONNECTTIMEOUT => 3,
+    ]);
+
+    $resp = curl_exec($ch);
+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    $err  = curl_error($ch);
+    curl_close($ch);
+
+    if ($err || $resp === false || $code !== 200) {
+        error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
+        return null;
+    }
+
+    $data = json_decode($resp, true);
+    $emb  = $data['data'][0]['embedding'] ?? null;
+    return (is_array($emb) && count($emb) > 0) ? $emb : null;
+}
+
+// ── Ollama backend ────────────────────────────────────────────────────────────
+
+function _ollamaGenerate(string $prompt, array $options): ?string
+{
+    $payload = json_encode([
+        'model'  => OLLAMA_MODEL,
+        'prompt' => $prompt,
+        'stream' => false,
+        'options' => [
+            'temperature'    => $options['temperature']    ?? 0.3,
+            'num_predict'    => $options['num_predict']    ?? 2048,
+            'num_ctx'        => $options['num_ctx']        ?? 6144,
+            'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
+            'keep_alive'     => -1,
+        ],
+    ]);
+
+    $ch = curl_init(OLLAMA_HOST . '/api/generate');
+    curl_setopt_array($ch, [
+        CURLOPT_POST           => true,
+        CURLOPT_POSTFIELDS     => $payload,
+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
+        CURLOPT_CONNECTTIMEOUT => 5,
+    ]);
+
+    $resp = curl_exec($ch);
+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    $err  = curl_error($ch);
+    curl_close($ch);
+
+    if ($err || $resp === false || $code !== 200) {
+        error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
+        return null;
+    }
+
+    $data = json_decode($resp, true);
+    $text = trim($data['response'] ?? '');
+    return $text !== '' ? $text : null;
+}
+
+function _ollamaEmbed(string $text): ?array
+{
+    // Try /api/embed (Ollama >= 0.1.26) first
+    $ch = curl_init(OLLAMA_HOST . '/api/embed');
+    curl_setopt_array($ch, [
+        CURLOPT_POST           => true,
+        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_TIMEOUT        => 15,
+        CURLOPT_CONNECTTIMEOUT => 5,
+    ]);
+    $resp = curl_exec($ch);
+    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    curl_close($ch);
+
+    if ($resp && $code === 200) {
+        $data = json_decode($resp, true);
+        $emb  = $data['embeddings'][0] ?? null;
+        if (is_array($emb) && count($emb) > 0) return $emb;
+    }
+
+    // Fallback: legacy /api/embeddings
+    $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
+    curl_setopt_array($ch, [
+        CURLOPT_POST           => true,
+        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
+        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
+        CURLOPT_RETURNTRANSFER => true,
+        CURLOPT_TIMEOUT        => 15,
+        CURLOPT_CONNECTTIMEOUT => 5,
+    ]);
+    $resp2 = curl_exec($ch);
+    $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
+    curl_close($ch);
+
+    if ($resp2 && $code2 === 200) {
+        $data2 = json_decode($resp2, true);
+        $emb2  = $data2['embedding'] ?? null;
+        if (is_array($emb2) && count($emb2) > 0) return $emb2;
+    }
+
+    error_log('[llm] All embed backends failed');
+    return null;
+}