| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- <?php
- /**
- * lib/llm.php
- *
- * Unified LLM inference helper.
- * Primary: llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
- * Fallback: Ollama (OLLAMA_HOST) — /api/generate + /api/embed
- *
- * Public API:
- * llmGenerate(string $prompt, array $options = []): string
- * llmEmbed(string $text): ?array
- *
- * $options keys (all optional):
- * temperature float default 0.3
- * num_predict int default 2048
- * num_ctx int default 6144 (Ollama only — ignored by llama.cpp)
- * repeat_penalty float default 1.1
- */
- require_once __DIR__ . '/../config/ai.php';
- // ── Public functions ──────────────────────────────────────────────────────────
- /**
- * Generate text from a prompt.
- * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
- *
- * @throws RuntimeException when both backends fail
- */
- function llmGenerate(string $prompt, array $options = []): string
- {
- $text = _llamacppGenerate($prompt, $options);
- if ($text !== null) {
- return $text;
- }
- error_log('[llm] llama.cpp unavailable — falling back to Ollama');
- $text = _ollamaGenerate($prompt, $options);
- if ($text !== null) {
- return $text;
- }
- throw new RuntimeException('All LLM backends unavailable');
- }
- /**
- * Embed text into a float vector.
- * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
- * Returns null only when both backends fail.
- */
- function llmEmbed(string $text): ?array
- {
- $text = substr($text, 0, 2000);
- $emb = _llamacppEmbed($text);
- if ($emb !== null) {
- return $emb;
- }
- error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');
- return _ollamaEmbed($text);
- }
- // ── llama.cpp backend ─────────────────────────────────────────────────────────
- function _llamacppGenerate(string $prompt, array $options): ?string
- {
- $payload = json_encode([
- 'prompt' => $prompt,
- 'n_predict' => $options['num_predict'] ?? 2048,
- 'temperature' => $options['temperature'] ?? 0.3,
- 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
- 'stop' => $options['stop'] ?? [],
- 'stream' => false,
- ]);
- $ch = curl_init(LLAMACPP_HOST . '/completion');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => $payload,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => LLAMACPP_TIMEOUT,
- CURLOPT_CONNECTTIMEOUT => 3,
- ]);
- $resp = curl_exec($ch);
- $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $err = curl_error($ch);
- curl_close($ch);
- if ($err || $resp === false || $code !== 200) {
- error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
- return null;
- }
- $data = json_decode($resp, true);
- $text = trim($data['content'] ?? '');
- return $text !== '' ? $text : null;
- }
- function _llamacppEmbed(string $text): ?array
- {
- // llama.cpp OpenAI-compat embedding endpoint
- $payload = json_encode(['input' => $text]);
- $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => $payload,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => 15,
- CURLOPT_CONNECTTIMEOUT => 3,
- ]);
- $resp = curl_exec($ch);
- $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $err = curl_error($ch);
- curl_close($ch);
- if ($err || $resp === false || $code !== 200) {
- error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
- return null;
- }
- $data = json_decode($resp, true);
- $emb = $data['data'][0]['embedding'] ?? null;
- return (is_array($emb) && count($emb) > 0) ? $emb : null;
- }
- // ── Ollama backend ────────────────────────────────────────────────────────────
- function _ollamaGenerate(string $prompt, array $options): ?string
- {
- $payload = json_encode([
- 'model' => OLLAMA_MODEL,
- 'prompt' => $prompt,
- 'stream' => false,
- 'options' => [
- 'temperature' => $options['temperature'] ?? 0.3,
- 'num_predict' => $options['num_predict'] ?? 2048,
- 'num_ctx' => $options['num_ctx'] ?? 6144,
- 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
- 'keep_alive' => -1,
- ],
- ]);
- $ch = curl_init(OLLAMA_HOST . '/api/generate');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => $payload,
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
- CURLOPT_CONNECTTIMEOUT => 5,
- ]);
- $resp = curl_exec($ch);
- $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- $err = curl_error($ch);
- curl_close($ch);
- if ($err || $resp === false || $code !== 200) {
- error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
- return null;
- }
- $data = json_decode($resp, true);
- $text = trim($data['response'] ?? '');
- return $text !== '' ? $text : null;
- }
- function _ollamaEmbed(string $text): ?array
- {
- // Try /api/embed (Ollama >= 0.1.26) first
- $ch = curl_init(OLLAMA_HOST . '/api/embed');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => 15,
- CURLOPT_CONNECTTIMEOUT => 5,
- ]);
- $resp = curl_exec($ch);
- $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- curl_close($ch);
- if ($resp && $code === 200) {
- $data = json_decode($resp, true);
- $emb = $data['embeddings'][0] ?? null;
- if (is_array($emb) && count($emb) > 0) return $emb;
- }
- // Fallback: legacy /api/embeddings
- $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
- curl_setopt_array($ch, [
- CURLOPT_POST => true,
- CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
- CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
- CURLOPT_RETURNTRANSFER => true,
- CURLOPT_TIMEOUT => 15,
- CURLOPT_CONNECTTIMEOUT => 5,
- ]);
- $resp2 = curl_exec($ch);
- $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- curl_close($ch);
- if ($resp2 && $code2 === 200) {
- $data2 = json_decode($resp2, true);
- $emb2 = $data2['embedding'] ?? null;
- if (is_array($emb2) && count($emb2) > 0) return $emb2;
- }
- error_log('[llm] All embed backends failed');
- return null;
- }
|