|
|
@@ -0,0 +1,220 @@
|
|
|
+<?php
|
|
|
+/**
|
|
|
+ * lib/llm.php
|
|
|
+ *
|
|
|
+ * Unified LLM inference helper.
|
|
|
+ * Primary: llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
|
|
|
+ * Fallback: Ollama (OLLAMA_HOST) — /api/generate + /api/embed
|
|
|
+ *
|
|
|
+ * Public API:
|
|
|
+ * llmGenerate(string $prompt, array $options = []): string
|
|
|
+ * llmEmbed(string $text): ?array
|
|
|
+ *
|
|
|
+ * $options keys (all optional):
|
|
|
+ * temperature float default 0.3
|
|
|
+ * num_predict int default 2048
|
|
|
+ * num_ctx int default 6144 (Ollama only — ignored by llama.cpp)
|
|
|
+ * repeat_penalty float default 1.1
|
|
|
+ */
|
|
|
+
|
|
|
+require_once __DIR__ . '/../config/ai.php';
|
|
|
+
|
|
|
+// ── Public functions ──────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+/**
|
|
|
+ * Generate text from a prompt.
|
|
|
+ * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
|
|
|
+ *
|
|
|
+ * @throws RuntimeException when both backends fail
|
|
|
+ */
|
|
|
+function llmGenerate(string $prompt, array $options = []): string
|
|
|
+{
|
|
|
+ $text = _llamacppGenerate($prompt, $options);
|
|
|
+ if ($text !== null) {
|
|
|
+ return $text;
|
|
|
+ }
|
|
|
+
|
|
|
+ error_log('[llm] llama.cpp unavailable — falling back to Ollama');
|
|
|
+
|
|
|
+ $text = _ollamaGenerate($prompt, $options);
|
|
|
+ if ($text !== null) {
|
|
|
+ return $text;
|
|
|
+ }
|
|
|
+
|
|
|
+ throw new RuntimeException('All LLM backends unavailable');
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Embed text into a float vector.
|
|
|
+ * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
|
|
|
+ * Returns null only when both backends fail.
|
|
|
+ */
|
|
|
+function llmEmbed(string $text): ?array
|
|
|
+{
|
|
|
+ $text = substr($text, 0, 2000);
|
|
|
+
|
|
|
+ $emb = _llamacppEmbed($text);
|
|
|
+ if ($emb !== null) {
|
|
|
+ return $emb;
|
|
|
+ }
|
|
|
+
|
|
|
+ error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');
|
|
|
+
|
|
|
+ return _ollamaEmbed($text);
|
|
|
+}
|
|
|
+
|
|
|
+// ── llama.cpp backend ─────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+function _llamacppGenerate(string $prompt, array $options): ?string
|
|
|
+{
|
|
|
+ $payload = json_encode([
|
|
|
+ 'prompt' => $prompt,
|
|
|
+ 'n_predict' => $options['num_predict'] ?? 2048,
|
|
|
+ 'temperature' => $options['temperature'] ?? 0.3,
|
|
|
+ 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
|
|
|
+ 'stop' => $options['stop'] ?? [],
|
|
|
+ 'stream' => false,
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $ch = curl_init(LLAMACPP_HOST . '/completion');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => $payload,
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => LLAMACPP_TIMEOUT,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 3,
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $resp = curl_exec($ch);
|
|
|
+ $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ $err = curl_error($ch);
|
|
|
+ curl_close($ch);
|
|
|
+
|
|
|
+ if ($err || $resp === false || $code !== 200) {
|
|
|
+ error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ $data = json_decode($resp, true);
|
|
|
+ $text = trim($data['content'] ?? '');
|
|
|
+ return $text !== '' ? $text : null;
|
|
|
+}
|
|
|
+
|
|
|
+function _llamacppEmbed(string $text): ?array
|
|
|
+{
|
|
|
+ // llama.cpp OpenAI-compat embedding endpoint
|
|
|
+ $payload = json_encode(['input' => $text]);
|
|
|
+
|
|
|
+ $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => $payload,
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => 15,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 3,
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $resp = curl_exec($ch);
|
|
|
+ $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ $err = curl_error($ch);
|
|
|
+ curl_close($ch);
|
|
|
+
|
|
|
+ if ($err || $resp === false || $code !== 200) {
|
|
|
+ error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ $data = json_decode($resp, true);
|
|
|
+ $emb = $data['data'][0]['embedding'] ?? null;
|
|
|
+ return (is_array($emb) && count($emb) > 0) ? $emb : null;
|
|
|
+}
|
|
|
+
|
|
|
+// ── Ollama backend ────────────────────────────────────────────────────────────
|
|
|
+
|
|
|
+function _ollamaGenerate(string $prompt, array $options): ?string
|
|
|
+{
|
|
|
+ $payload = json_encode([
|
|
|
+ 'model' => OLLAMA_MODEL,
|
|
|
+ 'prompt' => $prompt,
|
|
|
+ 'stream' => false,
|
|
|
+ 'options' => [
|
|
|
+ 'temperature' => $options['temperature'] ?? 0.3,
|
|
|
+ 'num_predict' => $options['num_predict'] ?? 2048,
|
|
|
+ 'num_ctx' => $options['num_ctx'] ?? 6144,
|
|
|
+ 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
|
|
|
+ 'keep_alive' => -1,
|
|
|
+ ],
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $ch = curl_init(OLLAMA_HOST . '/api/generate');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => $payload,
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 5,
|
|
|
+ ]);
|
|
|
+
|
|
|
+ $resp = curl_exec($ch);
|
|
|
+ $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ $err = curl_error($ch);
|
|
|
+ curl_close($ch);
|
|
|
+
|
|
|
+ if ($err || $resp === false || $code !== 200) {
|
|
|
+ error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ $data = json_decode($resp, true);
|
|
|
+ $text = trim($data['response'] ?? '');
|
|
|
+ return $text !== '' ? $text : null;
|
|
|
+}
|
|
|
+
|
|
|
+function _ollamaEmbed(string $text): ?array
|
|
|
+{
|
|
|
+ // Try /api/embed (Ollama >= 0.1.26) first
|
|
|
+ $ch = curl_init(OLLAMA_HOST . '/api/embed');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => 15,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 5,
|
|
|
+ ]);
|
|
|
+ $resp = curl_exec($ch);
|
|
|
+ $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ curl_close($ch);
|
|
|
+
|
|
|
+ if ($resp && $code === 200) {
|
|
|
+ $data = json_decode($resp, true);
|
|
|
+ $emb = $data['embeddings'][0] ?? null;
|
|
|
+ if (is_array($emb) && count($emb) > 0) return $emb;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Fallback: legacy /api/embeddings
|
|
|
+ $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
|
|
|
+ curl_setopt_array($ch, [
|
|
|
+ CURLOPT_POST => true,
|
|
|
+ CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
|
|
|
+ CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
|
|
|
+ CURLOPT_RETURNTRANSFER => true,
|
|
|
+ CURLOPT_TIMEOUT => 15,
|
|
|
+ CURLOPT_CONNECTTIMEOUT => 5,
|
|
|
+ ]);
|
|
|
+ $resp2 = curl_exec($ch);
|
|
|
+ $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
|
+ curl_close($ch);
|
|
|
+
|
|
|
+ if ($resp2 && $code2 === 200) {
|
|
|
+ $data2 = json_decode($resp2, true);
|
|
|
+ $emb2 = $data2['embedding'] ?? null;
|
|
|
+ if (is_array($emb2) && count($emb2) > 0) return $emb2;
|
|
|
+ }
|
|
|
+
|
|
|
+ error_log('[llm] All embed backends failed');
|
|
|
+ return null;
|
|
|
+}
|