<?php
/**
 * lib/llm.php
 *
 * Unified LLM inference helper.
 * Primary:  llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
 * Fallback: Ollama            (OLLAMA_HOST)   — /api/generate + /api/embed
 *
 * Public API:
 *   llmGenerate(string $prompt, array $options = []): string
 *   llmEmbed(string $text): ?array
 *
 * $options keys (all optional):
 *   temperature    float  default 0.3
 *   num_predict    int    default 2048
 *   num_ctx        int    default 6144  (Ollama only — ignored by llama.cpp)
 *   repeat_penalty float  default 1.1
 */

require_once __DIR__ . '/../config/ai.php';

// ── Public functions ──────────────────────────────────────────────────────────

/**
 * Generate text from a prompt.
 * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
 *
 * @throws RuntimeException when both backends fail
 */
function llmGenerate(string $prompt, array $options = []): string
{
    $text = _llamacppGenerate($prompt, $options);
    if ($text !== null) {
        return $text;
    }

    error_log('[llm] llama.cpp unavailable — falling back to Ollama');

    $text = _ollamaGenerate($prompt, $options);
    if ($text !== null) {
        return $text;
    }

    throw new RuntimeException('All LLM backends unavailable');
}

/**
 * Embed text into a float vector.
 * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
 * Returns null only when both backends fail.
 */
function llmEmbed(string $text): ?array
{
    $text = substr($text, 0, 2000);

    $emb = _llamacppEmbed($text);
    if ($emb !== null) {
        return $emb;
    }

    error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');

    return _ollamaEmbed($text);
}

// ── llama.cpp backend ─────────────────────────────────────────────────────────

function _llamacppGenerate(string $prompt, array $options): ?string
{
    $payload = json_encode([
        'prompt'         => $prompt,
        'n_predict'      => $options['num_predict']    ?? 2048,
        'temperature'    => $options['temperature']    ?? 0.3,
        'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
        'stop'           => $options['stop']           ?? [],
        'stream'         => false,
    ]);

    $ch = curl_init(LLAMACPP_HOST . '/completion');
    curl_setopt_array($ch, [
        CURLOPT_POST           => true,
        CURLOPT_POSTFIELDS     => $payload,
        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_TIMEOUT        => LLAMACPP_TIMEOUT,
        CURLOPT_CONNECTTIMEOUT => 3,
    ]);

    $resp = curl_exec($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $err  = curl_error($ch);
    curl_close($ch);

    if ($err || $resp === false || $code !== 200) {
        error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
        return null;
    }

    $data = json_decode($resp, true);
    $text = trim($data['content'] ?? '');
    return $text !== '' ? $text : null;
}

function _llamacppEmbed(string $text): ?array
{
    // llama.cpp OpenAI-compat embedding endpoint
    $payload = json_encode(['input' => $text]);

    $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
    curl_setopt_array($ch, [
        CURLOPT_POST           => true,
        CURLOPT_POSTFIELDS     => $payload,
        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_TIMEOUT        => 15,
        CURLOPT_CONNECTTIMEOUT => 3,
    ]);

    $resp = curl_exec($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $err  = curl_error($ch);
    curl_close($ch);

    if ($err || $resp === false || $code !== 200) {
        error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
        return null;
    }

    $data = json_decode($resp, true);
    $emb  = $data['data'][0]['embedding'] ?? null;
    return (is_array($emb) && count($emb) > 0) ? $emb : null;
}

// ── Ollama backend ────────────────────────────────────────────────────────────

function _ollamaGenerate(string $prompt, array $options): ?string
{
    $payload = json_encode([
        'model'  => OLLAMA_MODEL,
        'prompt' => $prompt,
        'stream' => false,
        'options' => [
            'temperature'    => $options['temperature']    ?? 0.3,
            'num_predict'    => $options['num_predict']    ?? 2048,
            'num_ctx'        => $options['num_ctx']        ?? 6144,
            'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
            'keep_alive'     => -1,
        ],
    ]);

    $ch = curl_init(OLLAMA_HOST . '/api/generate');
    curl_setopt_array($ch, [
        CURLOPT_POST           => true,
        CURLOPT_POSTFIELDS     => $payload,
        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_TIMEOUT        => OLLAMA_TIMEOUT,
        CURLOPT_CONNECTTIMEOUT => 5,
    ]);

    $resp = curl_exec($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $err  = curl_error($ch);
    curl_close($ch);

    if ($err || $resp === false || $code !== 200) {
        error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
        return null;
    }

    $data = json_decode($resp, true);
    $text = trim($data['response'] ?? '');
    return $text !== '' ? $text : null;
}

function _ollamaEmbed(string $text): ?array
{
    // Try /api/embed (Ollama >= 0.1.26) first
    $ch = curl_init(OLLAMA_HOST . '/api/embed');
    curl_setopt_array($ch, [
        CURLOPT_POST           => true,
        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_TIMEOUT        => 15,
        CURLOPT_CONNECTTIMEOUT => 5,
    ]);
    $resp = curl_exec($ch);
    $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    if ($resp && $code === 200) {
        $data = json_decode($resp, true);
        $emb  = $data['embeddings'][0] ?? null;
        if (is_array($emb) && count($emb) > 0) return $emb;
    }

    // Fallback: legacy /api/embeddings
    $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
    curl_setopt_array($ch, [
        CURLOPT_POST           => true,
        CURLOPT_POSTFIELDS     => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
        CURLOPT_HTTPHEADER     => ['Content-Type: application/json'],
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_TIMEOUT        => 15,
        CURLOPT_CONNECTTIMEOUT => 5,
    ]);
    $resp2 = curl_exec($ch);
    $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    curl_close($ch);

    if ($resp2 && $code2 === 200) {
        $data2 = json_decode($resp2, true);
        $emb2  = $data2['embedding'] ?? null;
        if (is_array($emb2) && count($emb2) > 0) return $emb2;
    }

    error_log('[llm] All embed backends failed');
    return null;
}