$prompt, 'n_predict' => $options['num_predict'] ?? 2048, 'temperature' => $options['temperature'] ?? 0.3, 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1, 'stop' => $options['stop'] ?? [], 'stream' => false, ]); $ch = curl_init(LLAMACPP_HOST . '/completion'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => LLAMACPP_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($err || $resp === false || $code !== 200) { error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code")); return null; } $data = json_decode($resp, true); $text = trim($data['content'] ?? ''); return $text !== '' ? $text : null; } function _llamacppEmbed(string $text): ?array { // llama.cpp OpenAI-compat embedding endpoint $payload = json_encode(['input' => $text]); $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 3, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($err || $resp === false || $code !== 200) { error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code")); return null; } $data = json_decode($resp, true); $emb = $data['data'][0]['embedding'] ?? null; return (is_array($emb) && count($emb) > 0) ? $emb : null; } // ── Ollama backend ──────────────────────────────────────────────────────────── function _ollamaGenerate(string $prompt, array $options): ?string { $payload = json_encode([ 'model' => OLLAMA_MODEL, 'prompt' => $prompt, 'stream' => false, 'options' => [ 'temperature' => $options['temperature'] ?? 0.3, 'num_predict' => $options['num_predict'] ?? 2048, 'num_ctx' => $options['num_ctx'] ?? 6144, 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1, 'keep_alive' => -1, ], ]); $ch = curl_init(OLLAMA_HOST . '/api/generate'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => $payload, CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => OLLAMA_TIMEOUT, CURLOPT_CONNECTTIMEOUT => 5, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); $err = curl_error($ch); curl_close($ch); if ($err || $resp === false || $code !== 200) { error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code")); return null; } $data = json_decode($resp, true); $text = trim($data['response'] ?? ''); return $text !== '' ? $text : null; } function _ollamaEmbed(string $text): ?array { // Try /api/embed (Ollama >= 0.1.26) first $ch = curl_init(OLLAMA_HOST . '/api/embed'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $text]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5, ]); $resp = curl_exec($ch); $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp && $code === 200) { $data = json_decode($resp, true); $emb = $data['embeddings'][0] ?? null; if (is_array($emb) && count($emb) > 0) return $emb; } // Fallback: legacy /api/embeddings $ch = curl_init(OLLAMA_HOST . '/api/embeddings'); curl_setopt_array($ch, [ CURLOPT_POST => true, CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]), CURLOPT_HTTPHEADER => ['Content-Type: application/json'], CURLOPT_RETURNTRANSFER => true, CURLOPT_TIMEOUT => 15, CURLOPT_CONNECTTIMEOUT => 5, ]); $resp2 = curl_exec($ch); $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($resp2 && $code2 === 200) { $data2 = json_decode($resp2, true); $emb2 = $data2['embedding'] ?? null; if (is_array($emb2) && count($emb2) > 0) return $emb2; } error_log('[llm] All embed backends failed'); return null; }