Przeglądaj źródła

Quantitative competitor extraction via sub-page scraping

POST /competitors/:id/extract-quantitative fetches /pricing, /features,
/about, and /careers sub-pages of the competitor's site, combines up to
8000 chars of content, and uses AI to extract structured data: pricing
tiers, key features, tech stack, target customer, job postings count, and
growth signals. Results persisted as quantitativeProfile + quantitativeExtractedAt
on the competitor document. Competitors.vue renders pricing tier chips, key
feature bullets, stats row, and growth signal chips per card, with an
'Extract Data' button triggering the new endpoint.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Benjamin Harris 3 tygodni temu
rodzic
commit
50ed6155aa

+ 93 - 0
services/gateway/server.js

@@ -3763,6 +3763,99 @@ No explanation, no markdown.`;
   }
 });
 
+// ─── Quantitative Competitor Extraction ──────────────────────────────────────
+
+app.post('/competitors/:id/extract-quantitative', async (request, reply) => {
+  const db = await getDb();
+  let oid;
+  try { oid = new ObjectId(request.params.id); } catch { return reply.code(400).send({ error: 'Invalid id' }); }
+
+  const competitor = await db.collection('competitors').findOne({ _id: oid });
+  if (!competitor) return reply.code(404).send({ error: 'Competitor not found' });
+  if (!competitor.websiteUrl) return reply.code(400).send({ error: 'Competitor has no website URL' });
+
+  const baseUrl = competitor.websiteUrl.replace(/\/$/, '');
+
+  // Fetch additional sub-pages: /pricing, /features, /about, /careers
+  const subPaths = ['/pricing', '/features', '/about', '/careers'];
+  const pageTexts = await Promise.all([
+    extractTextFromUrl(baseUrl),
+    ...subPaths.map((p) => extractTextFromUrl(baseUrl + p)),
+  ]);
+  const combinedText = pageTexts.filter(Boolean).join('\n\n---\n\n').slice(0, 8000);
+
+  if (!combinedText.trim()) {
+    return reply.code(503).send({ error: 'Could not fetch any content from the competitor site' });
+  }
+
+  const system = 'You are a competitive intelligence analyst. Return only valid JSON with no markdown or explanation.';
+  const prompt = `Extract quantitative competitive data from this competitor's website content.
+Competitor: ${competitor.name} (${baseUrl})
+
+Website content:
+${combinedText}
+
+Return this JSON:
+{
+  "pricingTiers": [{ "name": "<tier name>", "price": "<price or 'Free' or 'Contact us'>", "highlights": ["<feature>"] }],
+  "keyFeatures": ["<feature 1>", "<feature 2>", "<feature 3>", "<feature 4>", "<feature 5>"],
+  "techStack": ["<detected technology>"],
+  "targetCustomer": "<one-sentence ICP>",
+  "jobPostings": <number of open roles detected, 0 if none>,
+  "growthSignals": ["<any hiring, funding, expansion, or new product signals>"],
+  "productCount": <estimated number of distinct products/services, 0 if unclear>
+}
+
+If data is not available for a field, use null for numbers and [] for arrays. Return ONLY valid JSON.`;
+
+  try {
+    const pconf = await getActiveProviderConfig();
+    const model = pconf.model;
+    let text = '';
+
+    if (pconf.provider === 'ollama') {
+      const res = await axios.post(`${pconf.endpoint}/api/generate`, { model, prompt, system, stream: false }, { timeout: 120000 });
+      text = res.data.response;
+    } else if (pconf.provider === 'openai' || pconf.provider === 'groq') {
+      if (!pconf.apiKey) return reply.code(503).send({ error: `${pconf.provider} API key not configured` });
+      const res = await axios.post(`${pconf.baseUrl}/chat/completions`, {
+        model, messages: buildOpenAIMessages(prompt, system), stream: false,
+      }, { headers: { Authorization: `Bearer ${pconf.apiKey}` }, timeout: 120000 });
+      text = res.data.choices[0]?.message?.content || '';
+    } else if (pconf.provider === 'gemini') {
+      if (!pconf.apiKey) return reply.code(503).send({ error: 'Gemini API key not configured' });
+      const res = await axios.post(
+        `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${pconf.apiKey}`,
+        { contents: buildGeminiContents(prompt, system) },
+        { timeout: 120000 },
+      );
+      text = res.data.candidates?.[0]?.content?.parts?.[0]?.text || '';
+    } else {
+      return reply.code(400).send({ error: 'AI not configured' });
+    }
+
+    const cleaned = text.replace(/```(?:json)?\s*/gi, '').replace(/```\s*/g, '');
+    let profile = null;
+    try {
+      const jsonStr = (cleaned.match(/\{[\s\S]*\}/) || ['{}'])[0];
+      profile = JSON.parse(jsonStr);
+      if (!profile.keyFeatures) throw new Error('Missing keyFeatures');
+    } catch {
+      return reply.code(503).send({ error: 'AI returned invalid extraction format — try again' });
+    }
+
+    await db.collection('competitors').updateOne(
+      { _id: oid },
+      { $set: { quantitativeProfile: profile, quantitativeExtractedAt: new Date(), updatedAt: new Date() } },
+    );
+
+    log.info({ action: 'extract_quantitative', competitor: competitor.name, outcome: 'success' });
+    return { success: true, quantitativeProfile: profile, extractedAt: new Date() };
+  } catch (err) {
+    return reply.code(503).send({ error: 'Quantitative extraction failed', detail: err.message });
+  }
+});
+
 // ─── Strategic Group Map ─────────────────────────────────────────────────────
 
 const STRATEGIC_DIMENSIONS = [

+ 5 - 0
ui/src/locales/en.ts

@@ -643,6 +643,11 @@ export default {
     cancel: 'Cancel',
     delete: 'Remove',
     confirmDelete: 'Remove this competitor?',
+    extractQuantitative: 'Extract Data',
+    extractingQuantitative: 'Extracting…',
+    quantitativeLabel: 'Quantitative Profile',
+    quantitativePricing: 'Pricing',
+    quantitativeFeatures: 'Key features',
   },
 
   platforms: {

+ 5 - 0
ui/src/locales/tr.ts

@@ -643,6 +643,11 @@ export default {
     cancel: 'İptal',
     delete: 'Kaldır',
     confirmDelete: 'Bu rakip kaldırılsın mı?',
+    extractQuantitative: 'Veri Çıkar',
+    extractingQuantitative: 'Çıkarılıyor…',
+    quantitativeLabel: 'Nicel Profil',
+    quantitativePricing: 'Fiyatlandırma',
+    quantitativeFeatures: 'Temel özellikler',
   },
 
   platforms: {

+ 73 - 0
ui/src/views/Competitors.vue

@@ -574,6 +574,60 @@
               </div>
             </div>
           </div>
+        <!-- Quantitative profile -->
+        <div v-if="competitor.quantitativeProfile" class="mt-4 border-t border-gray-700/60 pt-3">
+          <div class="text-xs text-cyan-400 font-medium mb-2">{{ t('competitors.quantitativeLabel') }}</div>
+
+          <!-- Pricing tiers -->
+          <div v-if="competitor.quantitativeProfile.pricingTiers?.length" class="mb-2">
+            <div class="text-xs text-gray-500 mb-1">{{ t('competitors.quantitativePricing') }}</div>
+            <div class="flex flex-wrap gap-1.5">
+              <div v-for="tier in competitor.quantitativeProfile.pricingTiers" :key="tier.name"
+                class="px-2 py-1 bg-cyan-900/30 border border-cyan-700/40 rounded text-xs text-cyan-200">
+                <span class="font-medium">{{ tier.name }}</span>
+                <span class="text-cyan-400 ml-1">{{ tier.price }}</span>
+              </div>
+            </div>
+          </div>
+
+          <!-- Key features -->
+          <div v-if="competitor.quantitativeProfile.keyFeatures?.length" class="mb-2">
+            <div class="text-xs text-gray-500 mb-1">{{ t('competitors.quantitativeFeatures') }}</div>
+            <ul class="space-y-0.5">
+              <li v-for="f in competitor.quantitativeProfile.keyFeatures.slice(0, 5)" :key="f" class="flex gap-1 text-xs text-gray-300">
+                <span class="text-cyan-500 shrink-0">›</span>{{ f }}
+              </li>
+            </ul>
+          </div>
+
+          <!-- Stats row -->
+          <div class="flex flex-wrap gap-3 text-xs text-gray-400">
+            <span v-if="competitor.quantitativeProfile.targetCustomer" class="flex items-center gap-1">
+              <i class="fa-solid fa-user-group text-[9px] text-cyan-500"></i>{{ competitor.quantitativeProfile.targetCustomer }}
+            </span>
+            <span v-if="competitor.quantitativeProfile.jobPostings" class="flex items-center gap-1">
+              <i class="fa-solid fa-briefcase text-[9px] text-green-400"></i>{{ competitor.quantitativeProfile.jobPostings }} open roles
+            </span>
+          </div>
+
+          <!-- Growth signals -->
+          <div v-if="competitor.quantitativeProfile.growthSignals?.length" class="mt-1.5 flex flex-wrap gap-1">
+            <span v-for="s in competitor.quantitativeProfile.growthSignals" :key="s" class="text-xs px-1.5 py-0.5 bg-green-900/30 border border-green-700/40 text-green-300 rounded-full">
+              <i class="fa-solid fa-arrow-trend-up text-[9px] mr-0.5"></i>{{ s }}
+            </span>
+          </div>
+        </div>
+
+        <!-- Extract quantitative data button -->
+        <div class="mt-3 pt-3 border-t border-gray-700/60">
+          <button
+            @click="extractQuantitative(competitor._id)"
+            :disabled="extractingQuantitative[competitor._id]"
+            class="flex items-center gap-1.5 text-xs px-3 py-1.5 bg-cyan-900/50 hover:bg-cyan-800/60 disabled:opacity-40 border border-cyan-700/50 rounded-lg text-cyan-300 transition-colors w-full justify-center"
+          >
+            <i class="fa-solid fa-database text-[10px]" :class="{ 'animate-pulse': extractingQuantitative[competitor._id] }"></i>
+            {{ extractingQuantitative[competitor._id] ? t('competitors.extractingQuantitative') : t('competitors.extractQuantitative') }}
+          </button>
         </div>
       </div>
     </div>
@@ -753,6 +807,25 @@ const MATRIX_ROWS = [
   { key: 'keywords',     label: 'Top Keywords',         get: (c: Competitor) => ((c.keywords || []).slice(0, 5).map((k: any) => k.term).join(', ') || '—') },
 ] as const
 
+// ── Quantitative extraction ───────────────────────────────────────────────────
+const extractingQuantitative = reactive<Record<string, boolean>>({})
+
+async function extractQuantitative(id: string) {
+  extractingQuantitative[id] = true
+  try {
+    await competitorStore.fetchCompetitors()
+    const res = await axios.post(`/api/competitors/${id}/extract-quantitative`)
+    const idx = competitorStore.competitors.findIndex((c) => c._id === id)
+    if (idx !== -1) {
+      competitorStore.competitors[idx] = { ...competitorStore.competitors[idx], quantitativeProfile: res.data.quantitativeProfile }
+    }
+  } catch (err: any) {
+    alert(err.response?.data?.error || 'Quantitative extraction failed')
+  } finally {
+    extractingQuantitative[id] = false
+  }
+}
+
 async function generateMatrixSynthesis() {
   matrixLoading.value = true
   try {