llm.php 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. <?php
  2. /**
  3. * lib/llm.php
  4. *
  5. * Unified LLM inference helper.
  6. * Primary: llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
  7. * Fallback: Ollama (OLLAMA_HOST) — /api/generate + /api/embed
  8. *
  9. * Primary backend is llama-swap, which proxies multiple llama.cpp instances
  10. * and routes by model name via the OpenAI-compatible /v1 API.
  11. *
  12. * Public API:
  13. * llmGenerate(string $prompt, array $options = []): string
  14. * llmEmbed(string $text): ?array
  15. *
  16. * $options keys (all optional):
  17. * temperature float default LLAMACPP_TEMPERATURE
  18. * num_predict int default 2048 (maps to max_tokens)
  19. * num_ctx int default 6144 (Ollama only — ignored by llama-swap)
  20. * repeat_penalty float default 1.1 (maps to frequency_penalty approx)
  21. */
  22. require_once __DIR__ . '/../config/ai.php';
  23. // ── Public functions ──────────────────────────────────────────────────────────
  24. /**
  25. * Generate text from a prompt.
  26. * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
  27. *
  28. * @throws RuntimeException when both backends fail
  29. */
  30. function llmGenerate(string $prompt, array $options = []): string
  31. {
  32. $text = _llamacppGenerate($prompt, $options);
  33. if ($text !== null) {
  34. return $text;
  35. }
  36. error_log('[llm] llama.cpp unavailable — falling back to Ollama');
  37. $text = _ollamaGenerate($prompt, $options);
  38. if ($text !== null) {
  39. return $text;
  40. }
  41. throw new RuntimeException('All LLM backends unavailable');
  42. }
  43. /**
  44. * Embed text into a float vector.
  45. * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
  46. * Returns null only when both backends fail.
  47. */
  48. function llmEmbed(string $text): ?array
  49. {
  50. $text = substr($text, 0, 2000);
  51. // Only try llama-swap for embeddings if an embed model is configured
  52. if (LLAMACPP_EMBED_MODEL !== '') {
  53. $emb = _llamacppEmbed($text);
  54. if ($emb !== null) {
  55. return $emb;
  56. }
  57. error_log('[llm] llama-swap embed unavailable — falling back to Ollama');
  58. }
  59. return _ollamaEmbed($text);
  60. }
  61. // ── llama.cpp backend ─────────────────────────────────────────────────────────
  62. function _llamacppGenerate(string $prompt, array $options): ?string
  63. {
  64. // llama-swap uses the OpenAI chat completions endpoint, routed by model name
  65. $payload = json_encode([
  66. 'model' => LLAMACPP_MODEL,
  67. 'messages' => [['role' => 'user', 'content' => $prompt]],
  68. 'max_tokens' => $options['num_predict'] ?? 2048,
  69. 'temperature' => $options['temperature'] ?? LLAMACPP_TEMPERATURE,
  70. 'top_p' => $options['top_p'] ?? LLAMACPP_TOP_P,
  71. 'top_k' => $options['top_k'] ?? LLAMACPP_TOP_K,
  72. 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
  73. 'stop' => $options['stop'] ?? [],
  74. 'stream' => false,
  75. ]);
  76. $ch = curl_init(LLAMACPP_HOST . '/v1/chat/completions');
  77. curl_setopt_array($ch, [
  78. CURLOPT_POST => true,
  79. CURLOPT_POSTFIELDS => $payload,
  80. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  81. CURLOPT_RETURNTRANSFER => true,
  82. CURLOPT_TIMEOUT => LLAMACPP_TIMEOUT,
  83. CURLOPT_CONNECTTIMEOUT => 3,
  84. ]);
  85. $resp = curl_exec($ch);
  86. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  87. $err = curl_error($ch);
  88. curl_close($ch);
  89. if ($err || $resp === false || $code !== 200) {
  90. error_log('[llm] llama-swap generate: ' . ($err ?: "HTTP $code"));
  91. return null;
  92. }
  93. $data = json_decode($resp, true);
  94. $text = trim($data['choices'][0]['message']['content'] ?? '');
  95. return $text !== '' ? $text : null;
  96. }
  97. function _llamacppEmbed(string $text): ?array
  98. {
  99. // llama-swap routes embeddings by model name, same as completions
  100. $payload = json_encode(['model' => LLAMACPP_EMBED_MODEL, 'input' => $text]);
  101. $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
  102. curl_setopt_array($ch, [
  103. CURLOPT_POST => true,
  104. CURLOPT_POSTFIELDS => $payload,
  105. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  106. CURLOPT_RETURNTRANSFER => true,
  107. CURLOPT_TIMEOUT => 15,
  108. CURLOPT_CONNECTTIMEOUT => 3,
  109. ]);
  110. $resp = curl_exec($ch);
  111. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  112. $err = curl_error($ch);
  113. curl_close($ch);
  114. if ($err || $resp === false || $code !== 200) {
  115. error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
  116. return null;
  117. }
  118. $data = json_decode($resp, true);
  119. $emb = $data['data'][0]['embedding'] ?? null;
  120. return (is_array($emb) && count($emb) > 0) ? $emb : null;
  121. }
  122. // ── Ollama backend ────────────────────────────────────────────────────────────
  123. function _ollamaGenerate(string $prompt, array $options): ?string
  124. {
  125. $payload = json_encode([
  126. 'model' => OLLAMA_MODEL,
  127. 'prompt' => $prompt,
  128. 'stream' => false,
  129. 'options' => [
  130. 'temperature' => $options['temperature'] ?? 0.3,
  131. 'num_predict' => $options['num_predict'] ?? 2048,
  132. 'num_ctx' => $options['num_ctx'] ?? 6144,
  133. 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
  134. 'keep_alive' => -1,
  135. ],
  136. ]);
  137. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  138. curl_setopt_array($ch, [
  139. CURLOPT_POST => true,
  140. CURLOPT_POSTFIELDS => $payload,
  141. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  142. CURLOPT_RETURNTRANSFER => true,
  143. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  144. CURLOPT_CONNECTTIMEOUT => 5,
  145. ]);
  146. $resp = curl_exec($ch);
  147. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  148. $err = curl_error($ch);
  149. curl_close($ch);
  150. if ($err || $resp === false || $code !== 200) {
  151. error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
  152. return null;
  153. }
  154. $data = json_decode($resp, true);
  155. $text = trim($data['response'] ?? '');
  156. return $text !== '' ? $text : null;
  157. }
  158. function _ollamaEmbed(string $text): ?array
  159. {
  160. // Try /api/embed (Ollama >= 0.1.26) first
  161. $ch = curl_init(OLLAMA_HOST . '/api/embed');
  162. curl_setopt_array($ch, [
  163. CURLOPT_POST => true,
  164. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
  165. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  166. CURLOPT_RETURNTRANSFER => true,
  167. CURLOPT_TIMEOUT => 15,
  168. CURLOPT_CONNECTTIMEOUT => 5,
  169. ]);
  170. $resp = curl_exec($ch);
  171. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  172. curl_close($ch);
  173. if ($resp && $code === 200) {
  174. $data = json_decode($resp, true);
  175. $emb = $data['embeddings'][0] ?? null;
  176. if (is_array($emb) && count($emb) > 0) return $emb;
  177. }
  178. // Fallback: legacy /api/embeddings
  179. $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
  180. curl_setopt_array($ch, [
  181. CURLOPT_POST => true,
  182. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
  183. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  184. CURLOPT_RETURNTRANSFER => true,
  185. CURLOPT_TIMEOUT => 15,
  186. CURLOPT_CONNECTTIMEOUT => 5,
  187. ]);
  188. $resp2 = curl_exec($ch);
  189. $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  190. curl_close($ch);
  191. if ($resp2 && $code2 === 200) {
  192. $data2 = json_decode($resp2, true);
  193. $emb2 = $data2['embedding'] ?? null;
  194. if (is_array($emb2) && count($emb2) > 0) return $emb2;
  195. }
  196. error_log('[llm] All embed backends failed');
  197. return null;
  198. }