pdo = $pdo; global $table_prefix; $prefix = $table_prefix ?? 'wp_'; $this->prefix = $prefix; $this->posts_table = $prefix . 'posts'; $this->term_rel_table = $prefix . 'term_relationships'; $this->term_tax_table = $prefix . 'term_taxonomy'; } /** * Run the search * * @param string $term Search term * @param int $limit Results per page * @param int $offset Pagination offset * @param array $category_ids Optional category IDs to filter by * @return array Search results with total, rows, mode, time */ public function run(string $term, int $limit, int $offset, array $category_ids = []): array { $t0 = microtime(true); // Try Redis cache first $redis = ROI_APU_Search_Redis::get_instance(); $cacheKey = $redis->generateKey($term, $limit, $offset, $category_ids); $cached = $redis->get($cacheKey); if ($cached !== null) { $cached['time_ms'] = round((microtime(true) - $t0) * 1000, 2); $cached['cached'] = true; return $cached; } $tokens = self::tokens($term); // Pool sizes $capFull = max(120, min(300, $limit * 8)); $capLike = max(120, min(300, $limit * 8)); $capPref = max(60, min(200, $limit * 4)); $capEq = min(40, $limit * 2); $capCont = max(80, min(240, $limit * 6)); // Fetch from all buckets // Note: Skip CONTAINS when only 1 token since LIKE_ALL already does '%token%' $buckets = [ ['name' => 'LIKE_ALL', 'base' => 900.0, 'rows' => $this->fetchAllTokensLike($tokens, $capLike, $category_ids)], ['name' => 'FULLTEXT', 'base' => 700.0, 'rows' => $this->fetchFulltextTitle($term, $capFull, $category_ids)], ['name' => 'STARTS', 'base' => 650.0, 'rows' => $this->fetchStartsWith($term, $capPref, $category_ids)], ['name' => 'EQUALS', 'base' => 1200.0, 'rows' => $this->fetchEquals($term, $capEq, $category_ids)], ]; // Only add CONTAINS bucket when multiple tokens (otherwise LIKE_ALL is equivalent) if (count($tokens) > 1) { $buckets[] = ['name' => 'CONTAINS', 'base' => 500.0, 'rows' => $this->fetchContains($term, $capCont, $category_ids)]; } // Deduplicate by normalized title $seen = []; $pool = []; foreach ($buckets as $b) { foreach ($b['rows'] as $r) { $norm = self::normTitle($r['post_title']); if (isset($seen[$norm])) { continue; } $seen[$norm] = true; $pool[] = [ 'ID' => (int) $r['ID'], 'post_title' => (string) $r['post_title'], 'post_date' => (string) $r['post_date'], 'post_name' => (string) ($r['post_name'] ?? ''), 'bucket' => $b['name'], 'baseW' => (float) $b['base'], 'raw_rel' => isset($r['raw_rel']) ? (float) $r['raw_rel'] : 0.0, ]; } } $poolTotal = count($pool); if ($poolTotal === 0) { $elapsed = round((microtime(true) - $t0) * 1000, 2); return ['total' => 0, 'rows' => [], 'modo' => 'HYBRID', 'time_ms' => $elapsed]; } // Re-rank with scoring signals foreach ($pool as &$it) { $title = $it['post_title']; $date = $it['post_date']; $rawRel = $it['raw_rel']; $baseW = $it['baseW']; $score = $baseW + ($rawRel * self::RAW_REL_MULT) + self::coverageBoost($title, $tokens) + self::orderedWindowBoost($title, $tokens) + self::proximityBoost($title, $tokens) + self::startsWithBoost($title, $term) + self::wordExactBoost($title, $term) + (self::levenshteinSimilarity($title, $term) * 160.0) + self::tokenFuzzyBoost($title, $tokens) + self::recencyBoost($date) + self::lengthPenalty($title) + self::requiredTokensPenalty($title, $tokens); $it['score'] = $score; } unset($it); // Sort by score usort($pool, function ($a, $b) { if ($a['score'] === $b['score']) { return strcmp($b['post_date'], $a['post_date']); } return ($a['score'] < $b['score']) ? 1 : -1; }); // Paginate $pageRows = array_slice($pool, $offset, $limit); $rows = array_map(fn($r) => [ 'ID' => $r['ID'], 'post_title' => $r['post_title'], 'post_date' => $r['post_date'], 'post_name' => $r['post_name'] ?? '', 'permalink' => '', // Se construirá en search-endpoint.php ], $pageRows); $elapsed = round((microtime(true) - $t0) * 1000, 2); $result = [ 'total' => $poolTotal, 'rows' => $rows, 'modo' => 'HYBRID', 'time_ms' => $elapsed, ]; // Save to Redis cache $redis->set($cacheKey, $result); return $result; } /** * Build category JOIN clause */ private function buildCategoryJoin(array $category_ids): string { if (empty($category_ids)) { return ''; } return " INNER JOIN {$this->term_rel_table} tr ON p.ID = tr.object_id INNER JOIN {$this->term_tax_table} tt ON tr.term_taxonomy_id = tt.term_taxonomy_id AND tt.taxonomy = 'category' "; } /** * Build category WHERE clause */ private function buildCategoryWhere(array $category_ids, array &$params): string { if (empty($category_ids)) { return ''; } $placeholders = []; foreach ($category_ids as $i => $cat_id) { $key = ":cat_{$i}"; $placeholders[] = $key; $params[$key] = $cat_id; } return ' AND tt.term_id IN (' . implode(',', $placeholders) . ')'; } /** * Fetch exact matches */ private function fetchEquals(string $term, int $limit, array $category_ids): array { $params = [':t' => $term, ':lim' => $limit]; $catJoin = $this->buildCategoryJoin($category_ids); $catWhere = $this->buildCategoryWhere($category_ids, $params); $sql = "SELECT DISTINCT p.ID, p.post_title, p.post_date, p.post_name FROM {$this->posts_table} p {$catJoin} WHERE p.post_type = 'post' AND p.post_status = 'publish' AND p.post_title COLLATE utf8mb4_general_ci = :t {$catWhere} ORDER BY p.post_date DESC LIMIT :lim"; $st = $this->pdo->prepare($sql); foreach ($params as $key => $val) { $st->bindValue($key, $val, is_int($val) ? PDO::PARAM_INT : PDO::PARAM_STR); } $st->execute(); return $st->fetchAll(); } /** * Fetch starts with matches */ private function fetchStartsWith(string $term, int $limit, array $category_ids): array { $prefix = str_replace(['\\', '%', '_'], ['\\\\', '\%', '\_'], $term) . '%'; $params = [':p' => $prefix, ':lim' => $limit]; $catJoin = $this->buildCategoryJoin($category_ids); $catWhere = $this->buildCategoryWhere($category_ids, $params); $sql = "SELECT DISTINCT p.ID, p.post_title, p.post_date, p.post_name FROM {$this->posts_table} p {$catJoin} WHERE p.post_type = 'post' AND p.post_status = 'publish' AND p.post_title LIKE :p ESCAPE '\\\\' {$catWhere} ORDER BY p.post_date DESC LIMIT :lim"; $st = $this->pdo->prepare($sql); foreach ($params as $key => $val) { $st->bindValue($key, $val, is_int($val) ? PDO::PARAM_INT : PDO::PARAM_STR); } $st->execute(); return $st->fetchAll(); } /** * Fetch FULLTEXT matches on title */ private function fetchFulltextTitle(string $term, int $limit, array $category_ids): array { $q = self::booleanQuery($term); if ($q === '') { return []; } $params = [':q' => $q, ':lim' => $limit]; $catJoin = $this->buildCategoryJoin($category_ids); $catWhere = $this->buildCategoryWhere($category_ids, $params); $sql = "SELECT DISTINCT p.ID, p.post_title, p.post_date, p.post_name, MATCH(p.post_title) AGAINST (:q IN BOOLEAN MODE) AS raw_rel FROM {$this->posts_table} p {$catJoin} WHERE p.post_type = 'post' AND p.post_status = 'publish' AND MATCH(p.post_title) AGAINST (:q IN BOOLEAN MODE) {$catWhere} ORDER BY raw_rel DESC, p.post_date DESC LIMIT :lim"; $st = $this->pdo->prepare($sql); foreach ($params as $key => $val) { $st->bindValue($key, $val, is_int($val) ? PDO::PARAM_INT : PDO::PARAM_STR); } $st->execute(); return $st->fetchAll(); } /** * Fetch all tokens with LIKE (AND) */ private function fetchAllTokensLike(array $tokens, int $limit, array $category_ids): array { if (empty($tokens)) { return []; } $params = [':lim' => $limit]; $likeConds = []; foreach ($tokens as $i => $t) { $key = ":lk{$i}"; $likeConds[] = "p.post_title LIKE {$key} ESCAPE '\\\\'"; $params[$key] = '%' . str_replace(['\\', '%', '_'], ['\\\\', '\%', '\_'], $t) . '%'; } $catJoin = $this->buildCategoryJoin($category_ids); $catWhere = $this->buildCategoryWhere($category_ids, $params); $where = implode(' AND ', $likeConds); $sql = "SELECT DISTINCT p.ID, p.post_title, p.post_date, p.post_name FROM {$this->posts_table} p {$catJoin} WHERE p.post_type = 'post' AND p.post_status = 'publish' AND {$where} {$catWhere} ORDER BY p.post_date DESC LIMIT :lim"; $st = $this->pdo->prepare($sql); foreach ($params as $key => $val) { $st->bindValue($key, $val, is_int($val) ? PDO::PARAM_INT : PDO::PARAM_STR); } $st->execute(); return $st->fetchAll(); } /** * Fetch contains matches */ private function fetchContains(string $term, int $limit, array $category_ids): array { $like = '%' . str_replace(['\\', '%', '_'], ['\\\\', '\%', '\_'], $term) . '%'; $params = [':l' => $like, ':lim' => $limit]; $catJoin = $this->buildCategoryJoin($category_ids); $catWhere = $this->buildCategoryWhere($category_ids, $params); $sql = "SELECT DISTINCT p.ID, p.post_title, p.post_date, p.post_name FROM {$this->posts_table} p {$catJoin} WHERE p.post_type = 'post' AND p.post_status = 'publish' AND p.post_title LIKE :l ESCAPE '\\\\' {$catWhere} ORDER BY p.post_date DESC LIMIT :lim"; $st = $this->pdo->prepare($sql); foreach ($params as $key => $val) { $st->bindValue($key, $val, is_int($val) ? PDO::PARAM_INT : PDO::PARAM_STR); } $st->execute(); return $st->fetchAll(); } // ==================== Text/Token Utilities ==================== private static function asciiFold(string $s): string { $s = mb_strtolower($s, 'UTF-8'); $x = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $s); if ($x !== false) { $s = $x; } $s = preg_replace('/[^a-z0-9 ]+/i', ' ', $s); $s = preg_replace('/\s+/', ' ', trim($s)); return $s; } private static function normTitle(string $title): string { return substr(self::asciiFold($title), 0, 160); } private static function tokens(string $term): array { $t = self::asciiFold($term); $raw = array_values(array_filter(explode(' ', $t), fn($x) => $x !== '')); $keep1 = ['f', 'x']; $parts = []; foreach ($raw as $x) { if (ctype_digit($x)) { $parts[] = $x; } elseif (strlen($x) >= 2) { $parts[] = $x; } elseif (in_array($x, $keep1, true)) { $parts[] = $x; } } // Dedupe preserving order $seen = []; $out = []; foreach ($parts as $p) { if (!isset($seen[$p])) { $seen[$p] = true; $out[] = $p; } } return $out; } private static function booleanQuery(string $input): string { $input = preg_replace("/[^\\p{L}\\p{N}\\s\"'\\+\\-\\*]/u", ' ', trim($input)); $len = mb_strlen($input, 'UTF-8'); $buf = ''; $inQ = false; $out = []; for ($i = 0; $i < $len; $i++) { $ch = mb_substr($input, $i, 1, 'UTF-8'); if ($ch === '"') { if ($inQ) { $buf .= $ch; if ($buf !== '""') { $out[] = $buf; } $buf = ''; $inQ = false; } else { if ($buf !== '') { $out[] = $buf; $buf = ''; } $buf = '"'; $inQ = true; } } elseif (preg_match('/\s/u', $ch)) { if ($inQ) { $buf .= $ch; } else { if ($buf !== '') { $out[] = $buf; $buf = ''; } } } else { $buf .= $ch; } } if ($buf !== '') { $out[] = $inQ ? ($buf . '"') : $buf; } $parts = []; foreach ($out as $tok) { if ($tok === '' || mb_strlen($tok, 'UTF-8') < 2) { continue; } $U = strtoupper($tok); if ($tok[0] === '"' && substr($tok, -1) === '"') { $parts[] = '+' . $tok; } elseif (in_array($U, ['AND', 'OR', 'NOT'], true)) { $parts[] = $U; } else { $parts[] = '+' . $tok . '*'; } } return implode(' ', $parts); } // ==================== Scoring Functions ==================== private static function coverageBoost(string $title, array $tokens): float { if (empty($tokens)) { return 0.0; } $t = self::asciiFold($title); $hit = 0; foreach ($tokens as $tok) { if ($tok !== '' && strpos($t, $tok) !== false) { $hit++; } } return ($hit / max(1, count($tokens))) * self::W_COVERAGE; } private static function requiredTokensPenalty(string $title, array $tokens): float { $n = count($tokens); if ($n === 0 || $n > 4) { return 0.0; } $t = self::asciiFold($title); $hit = 0; foreach ($tokens as $tok) { if ($tok !== '' && strpos($t, $tok) !== false) { $hit++; } } $miss = $n - $hit; if ($miss <= 0) { return 0.0; } return -(self::REQ_BASE_PENALTY + self::REQ_MISS_PER_TOKEN * $miss); } private static function startsWithBoost(string $title, string $term): float { $a = self::asciiFold($title); $b = self::asciiFold($term); return str_starts_with($a, $b) ? self::W_STARTSWITH : 0.0; } private static function wordExactBoost(string $title, string $term): float { $a = ' ' . self::asciiFold($title) . ' '; $b = self::asciiFold($term); if ($b === '') { return 0.0; } return preg_match('/\b' . preg_quote($b, '/') . '\b/u', $a) ? self::W_WORD_EXACT : 0.0; } private static function recencyBoost(string $date): float { $d = strtotime($date); if (!$d) { return 0.0; } $days = max(1, (time() - $d) / 86400); return self::W_RECENCY_MAX / (1.0 + $days / 180.0); } private static function levenshteinSimilarity(string $a, string $b): float { $aa = substr(self::asciiFold($a), 0, 80); $bb = substr(self::asciiFold($b), 0, 80); if ($aa === '' || $bb === '') { return 0.0; } $dist = levenshtein($aa, $bb); $max = max(strlen($aa), strlen($bb)); return $max > 0 ? max(0.0, 1.0 - ($dist / $max)) : 0.0; } private static function tokenFuzzyBoost(string $title, array $tokens): float { if (empty($tokens)) { return 0.0; } $tw = array_slice(preg_split('/\s+/', self::asciiFold($title)), 0, 12); if (empty($tw)) { return 0.0; } $best = 0.0; foreach ($tokens as $tok) { $tokA = self::asciiFold($tok); foreach ($tw as $w) { if ($w === '' || $tokA === '') { continue; } $max = max(strlen($tokA), strlen($w)); if ($max === 0) { continue; } $sim = 1.0 - (levenshtein($tokA, $w) / $max); if ($sim > $best) { $best = $sim; } } } return max(0.0, $best) * self::W_FUZZY_TOKEN_MAX; } private static function findPositions(string $foldedTitle, string $token): array { $T = $foldedTitle; $occ = []; $lenT = strlen($T); $lenK = strlen($token); if ($lenK === 0) { return $occ; } $hasLetter = (bool) preg_match('/[a-z]/', $token); $hasDigit = (bool) preg_match('/[0-9]/', $token); $isMixed = $hasLetter && $hasDigit; $pos = 0; while (true) { $p = strpos($T, $token, $pos); if ($p === false) { break; } $left = ($p > 0) ? $T[$p - 1] : ' '; $right = ($p + $lenK < $lenT) ? $T[$p + $lenK] : ' '; $leftOk = !ctype_alnum($left); $rightOk = $isMixed ? true : !ctype_alnum($right); if ($leftOk && $rightOk) { $occ[] = [$p, $p + $lenK]; } $pos = $p + 1; } return $occ; } private static function proximityBoost(string $title, array $tokens): float { $tokens = array_values(array_unique(array_filter($tokens, fn($t) => $t !== ''))); if (count($tokens) < 2) { return 0.0; } $T = self::asciiFold($title); $occ = []; foreach ($tokens as $tok) { foreach (self::findPositions($T, $tok) as $p) { $occ[] = ['pos' => $p[0], 'end' => $p[1], 'tok' => $tok]; } } if (empty($occ)) { return 0.0; } usort($occ, fn($a, $b) => $a['pos'] <=> $b['pos']); $present = []; foreach ($occ as $o) { $present[$o['tok']] = true; } $needCount = count($present); if ($needCount < 2) { return 0.0; } $cnt = []; $covered = 0; $bestSpan = PHP_INT_MAX; for ($r = 0, $l = 0; $r < count($occ); $r++) { $t = $occ[$r]['tok']; $cnt[$t] = ($cnt[$t] ?? 0) + 1; if ($cnt[$t] === 1) { $covered++; } while ($covered === $needCount && $l <= $r) { $span = $occ[$r]['end'] - $occ[$l]['pos']; if ($span < $bestSpan) { $bestSpan = $span; } $lt = $occ[$l]['tok']; $cnt[$lt]--; if ($cnt[$lt] === 0) { $covered--; } $l++; } } if ($bestSpan === PHP_INT_MAX) { return 0.0; } $compact = $needCount / max(1, $bestSpan); return self::W_PROX_CHARS * $compact; } private static function orderedWindowBoost(string $title, array $tokens): float { $tokens = array_values(array_unique(array_filter($tokens, fn($t) => $t !== ''))); if (count($tokens) < 2) { return 0.0; } $T = self::asciiFold($title); $posList = []; foreach ($tokens as $t) { $posList[$t] = self::findPositions($T, $t); if (empty($posList[$t])) { return 0.0; } } $bestSpanChars = PHP_INT_MAX; $bestStart = -1; $t0 = $tokens[0]; foreach ($posList[$t0] as $p0) { $start = $p0[0]; $end = $p0[1]; $ok = true; $cursor = $end; for ($i = 1; $i < count($tokens); $i++) { $tok = $tokens[$i]; $found = false; foreach ($posList[$tok] as $pp) { if ($pp[0] >= $cursor) { $end = max($end, $pp[1]); $cursor = $pp[1]; $found = true; break; } } if (!$found) { $ok = false; break; } } if ($ok) { $span = $end - $start; if ($span < $bestSpanChars) { $bestSpanChars = $span; $bestStart = $start; } } } if ($bestSpanChars === PHP_INT_MAX) { return 0.0; } $slice = substr($T, max(0, $bestStart), max(1, $bestSpanChars)); $wordsInSpan = max(1, count(array_filter(explode(' ', $slice)))); $tightness = count($tokens) / $wordsInSpan; $score = self::W_ORDERED_WINDOW * $tightness; if ($bestStart <= 6) { $score += self::W_ORDERED_ANCHOR; } return $score; } private static function lengthPenalty(string $title): float { $len = mb_strlen($title, 'UTF-8'); if ($len <= self::LEN_PEN_START) { return 0.0; } $extra = $len - self::LEN_PEN_START; return -min(300.0, $extra * self::LEN_PEN_PER_CHAR); } // ==================== URL Helpers ==================== /** * Obtiene la URL del sitio desde wp_options */ private function getSiteUrlFromDb(): string { static $cached = null; if ($cached !== null) { return $cached; } $stmt = $this->pdo->prepare( "SELECT option_value FROM {$this->prefix}options WHERE option_name = 'home' LIMIT 1" ); $stmt->execute(); $result = $stmt->fetch(\PDO::FETCH_ASSOC); $cached = $result ? rtrim($result['option_value'], '/') : ''; return $cached; } /** * Obtiene la estructura de permalinks desde wp_options */ private function getPermalinkStructure(): string { static $cached = null; if ($cached !== null) { return $cached; } $stmt = $this->pdo->prepare( "SELECT option_value FROM {$this->prefix}options WHERE option_name = 'permalink_structure' LIMIT 1" ); $stmt->execute(); $result = $stmt->fetch(\PDO::FETCH_ASSOC); $cached = $result ? $result['option_value'] : ''; return $cached; } /** * Construye permalink desde post_name * Maneja diferentes estructuras de permalinks */ public function buildPermalink(int $postId, string $postName): string { // Fallback si post_name está vacío if (empty($postName)) { $siteUrl = $this->getSiteUrlFromDb(); return $siteUrl . '/?p=' . $postId; } $siteUrl = $this->getSiteUrlFromDb(); $structure = $this->getPermalinkStructure(); // Si estructura contiene %post_id%, usar ID if (strpos($structure, '%post_id%') !== false) { return $siteUrl . '/' . $postId . '/'; } // Si estructura contiene %postname%, usar post_name if (strpos($structure, '%postname%') !== false) { return $siteUrl . '/' . $postName . '/'; } // Fallback: usar post_name (estructura más común) return $siteUrl . '/' . $postName . '/'; } }