llm.php 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. <?php
  2. /**
  3. * lib/llm.php
  4. *
  5. * Unified LLM inference helper.
  6. * Primary: llama.cpp server (LLAMACPP_HOST) — /completion + /v1/embeddings
  7. * Fallback: Ollama (OLLAMA_HOST) — /api/generate + /api/embed
  8. *
  9. * Public API:
  10. * llmGenerate(string $prompt, array $options = []): string
  11. * llmEmbed(string $text): ?array
  12. *
  13. * $options keys (all optional):
  14. * temperature float default 0.3
  15. * num_predict int default 2048
  16. * num_ctx int default 6144 (Ollama only — ignored by llama.cpp)
  17. * repeat_penalty float default 1.1
  18. */
  19. require_once __DIR__ . '/../config/ai.php';
  20. // ── Public functions ──────────────────────────────────────────────────────────
  21. /**
  22. * Generate text from a prompt.
  23. * Tries llama.cpp first; falls back to Ollama on connection failure or non-200.
  24. *
  25. * @throws RuntimeException when both backends fail
  26. */
  27. function llmGenerate(string $prompt, array $options = []): string
  28. {
  29. $text = _llamacppGenerate($prompt, $options);
  30. if ($text !== null) {
  31. return $text;
  32. }
  33. error_log('[llm] llama.cpp unavailable — falling back to Ollama');
  34. $text = _ollamaGenerate($prompt, $options);
  35. if ($text !== null) {
  36. return $text;
  37. }
  38. throw new RuntimeException('All LLM backends unavailable');
  39. }
  40. /**
  41. * Embed text into a float vector.
  42. * Tries llama.cpp /v1/embeddings first; falls back to Ollama /api/embed.
  43. * Returns null only when both backends fail.
  44. */
  45. function llmEmbed(string $text): ?array
  46. {
  47. $text = substr($text, 0, 2000);
  48. $emb = _llamacppEmbed($text);
  49. if ($emb !== null) {
  50. return $emb;
  51. }
  52. error_log('[llm] llama.cpp embed unavailable — falling back to Ollama');
  53. return _ollamaEmbed($text);
  54. }
  55. // ── llama.cpp backend ─────────────────────────────────────────────────────────
  56. function _llamacppGenerate(string $prompt, array $options): ?string
  57. {
  58. $payload = json_encode([
  59. 'prompt' => $prompt,
  60. 'n_predict' => $options['num_predict'] ?? 2048,
  61. 'temperature' => $options['temperature'] ?? 0.3,
  62. 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
  63. 'stop' => $options['stop'] ?? [],
  64. 'stream' => false,
  65. ]);
  66. $ch = curl_init(LLAMACPP_HOST . '/completion');
  67. curl_setopt_array($ch, [
  68. CURLOPT_POST => true,
  69. CURLOPT_POSTFIELDS => $payload,
  70. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  71. CURLOPT_RETURNTRANSFER => true,
  72. CURLOPT_TIMEOUT => LLAMACPP_TIMEOUT,
  73. CURLOPT_CONNECTTIMEOUT => 3,
  74. ]);
  75. $resp = curl_exec($ch);
  76. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  77. $err = curl_error($ch);
  78. curl_close($ch);
  79. if ($err || $resp === false || $code !== 200) {
  80. error_log('[llm] llama.cpp generate: ' . ($err ?: "HTTP $code"));
  81. return null;
  82. }
  83. $data = json_decode($resp, true);
  84. $text = trim($data['content'] ?? '');
  85. return $text !== '' ? $text : null;
  86. }
  87. function _llamacppEmbed(string $text): ?array
  88. {
  89. // llama.cpp OpenAI-compat embedding endpoint
  90. $payload = json_encode(['input' => $text]);
  91. $ch = curl_init(LLAMACPP_HOST . '/v1/embeddings');
  92. curl_setopt_array($ch, [
  93. CURLOPT_POST => true,
  94. CURLOPT_POSTFIELDS => $payload,
  95. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  96. CURLOPT_RETURNTRANSFER => true,
  97. CURLOPT_TIMEOUT => 15,
  98. CURLOPT_CONNECTTIMEOUT => 3,
  99. ]);
  100. $resp = curl_exec($ch);
  101. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  102. $err = curl_error($ch);
  103. curl_close($ch);
  104. if ($err || $resp === false || $code !== 200) {
  105. error_log('[llm] llama.cpp embed: ' . ($err ?: "HTTP $code"));
  106. return null;
  107. }
  108. $data = json_decode($resp, true);
  109. $emb = $data['data'][0]['embedding'] ?? null;
  110. return (is_array($emb) && count($emb) > 0) ? $emb : null;
  111. }
  112. // ── Ollama backend ────────────────────────────────────────────────────────────
  113. function _ollamaGenerate(string $prompt, array $options): ?string
  114. {
  115. $payload = json_encode([
  116. 'model' => OLLAMA_MODEL,
  117. 'prompt' => $prompt,
  118. 'stream' => false,
  119. 'options' => [
  120. 'temperature' => $options['temperature'] ?? 0.3,
  121. 'num_predict' => $options['num_predict'] ?? 2048,
  122. 'num_ctx' => $options['num_ctx'] ?? 6144,
  123. 'repeat_penalty' => $options['repeat_penalty'] ?? 1.1,
  124. 'keep_alive' => -1,
  125. ],
  126. ]);
  127. $ch = curl_init(OLLAMA_HOST . '/api/generate');
  128. curl_setopt_array($ch, [
  129. CURLOPT_POST => true,
  130. CURLOPT_POSTFIELDS => $payload,
  131. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  132. CURLOPT_RETURNTRANSFER => true,
  133. CURLOPT_TIMEOUT => OLLAMA_TIMEOUT,
  134. CURLOPT_CONNECTTIMEOUT => 5,
  135. ]);
  136. $resp = curl_exec($ch);
  137. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  138. $err = curl_error($ch);
  139. curl_close($ch);
  140. if ($err || $resp === false || $code !== 200) {
  141. error_log('[llm] Ollama generate: ' . ($err ?: "HTTP $code"));
  142. return null;
  143. }
  144. $data = json_decode($resp, true);
  145. $text = trim($data['response'] ?? '');
  146. return $text !== '' ? $text : null;
  147. }
  148. function _ollamaEmbed(string $text): ?array
  149. {
  150. // Try /api/embed (Ollama >= 0.1.26) first
  151. $ch = curl_init(OLLAMA_HOST . '/api/embed');
  152. curl_setopt_array($ch, [
  153. CURLOPT_POST => true,
  154. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'input' => $text]),
  155. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  156. CURLOPT_RETURNTRANSFER => true,
  157. CURLOPT_TIMEOUT => 15,
  158. CURLOPT_CONNECTTIMEOUT => 5,
  159. ]);
  160. $resp = curl_exec($ch);
  161. $code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  162. curl_close($ch);
  163. if ($resp && $code === 200) {
  164. $data = json_decode($resp, true);
  165. $emb = $data['embeddings'][0] ?? null;
  166. if (is_array($emb) && count($emb) > 0) return $emb;
  167. }
  168. // Fallback: legacy /api/embeddings
  169. $ch = curl_init(OLLAMA_HOST . '/api/embeddings');
  170. curl_setopt_array($ch, [
  171. CURLOPT_POST => true,
  172. CURLOPT_POSTFIELDS => json_encode(['model' => EMBED_MODEL, 'prompt' => $text]),
  173. CURLOPT_HTTPHEADER => ['Content-Type: application/json'],
  174. CURLOPT_RETURNTRANSFER => true,
  175. CURLOPT_TIMEOUT => 15,
  176. CURLOPT_CONNECTTIMEOUT => 5,
  177. ]);
  178. $resp2 = curl_exec($ch);
  179. $code2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  180. curl_close($ch);
  181. if ($resp2 && $code2 === 200) {
  182. $data2 = json_decode($resp2, true);
  183. $emb2 = $data2['embedding'] ?? null;
  184. if (is_array($emb2) && count($emb2) > 0) return $emb2;
  185. }
  186. error_log('[llm] All embed backends failed');
  187. return null;
  188. }