fix(php): exclude tables and embeds from ad injection

- Add mapForbiddenZones() for tables, iframes, embeds
- Remove table from scannable elements pattern
- Filter positions inside forbidden zones
- Fixes ads inside table cells and YouTube embeds
This commit is contained in:
FrankZamora
2025-12-10 11:44:40 -06:00
parent 4f1e85fe88
commit 04387d46bb

View File

@@ -163,6 +163,11 @@ final class ContentAdInjector
/**
* PASO 1: Escanea el contenido para encontrar ubicaciones elegibles
*
* IMPORTANTE: No inserta anuncios:
* - Dentro de tablas (<table>...</table>)
* - Dentro de embeds/iframes (YouTube, etc.)
* - Despues de tablas (tables excluidas completamente)
*
* @return array{position: int, type: string, tag: string, element_index: int}[]
*/
private function scanContent(string $content): array
@@ -170,8 +175,12 @@ final class ContentAdInjector
$locations = [];
$elementIndex = 0;
// Primero, mapear zonas prohibidas (tablas, iframes, embeds)
$forbiddenZones = $this->mapForbiddenZones($content);
// Regex para encontrar tags de cierre de elementos de bloque
$pattern = '/(<\/(?:p|h2|h3|figure|ul|ol|table|blockquote)>)/i';
// NOTA: Excluimos </table> - no queremos insertar despues de tablas
$pattern = '/(<\/(?:p|h2|h3|figure|ul|ol|blockquote)>)/i';
// Encontrar todas las coincidencias con sus posiciones
if (preg_match_all($pattern, $content, $matches, PREG_OFFSET_CAPTURE)) {
@@ -179,6 +188,11 @@ final class ContentAdInjector
$tag = strtolower($match[0]);
$position = $match[1] + strlen($match[0]); // Posicion despues del tag
// Verificar que no este dentro de una zona prohibida
if ($this->isInForbiddenZone($position, $forbiddenZones)) {
continue;
}
$type = $this->getTypeFromTag($tag);
if ($type) {
$locations[] = [
@@ -191,8 +205,8 @@ final class ContentAdInjector
}
}
// Detectar imagenes standalone (no dentro de figure)
$locations = array_merge($locations, $this->scanStandaloneImages($content, $elementIndex));
// Detectar imagenes standalone (no dentro de figure ni zonas prohibidas)
$locations = array_merge($locations, $this->scanStandaloneImages($content, $elementIndex, $forbiddenZones));
// Validar listas (minimo 3 items)
$locations = $this->validateLists($content, $locations);
@@ -208,8 +222,75 @@ final class ContentAdInjector
return $locations;
}
/**
* Mapea zonas donde NO se deben insertar anuncios
* Incluye: tablas, iframes, embeds de video
*
* @return array{start: int, end: int}[]
*/
private function mapForbiddenZones(string $content): array
{
$zones = [];
// Tablas: <table>...</table>
if (preg_match_all('/<table[^>]*>.*?<\/table>/is', $content, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[0] as $match) {
$zones[] = [
'start' => $match[1],
'end' => $match[1] + strlen($match[0]),
];
}
}
// Iframes (YouTube, Vimeo, etc): <iframe>...</iframe>
if (preg_match_all('/<iframe[^>]*>.*?<\/iframe>/is', $content, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[0] as $match) {
$zones[] = [
'start' => $match[1],
'end' => $match[1] + strlen($match[0]),
];
}
}
// Divs con clase de embed/video (wp-block-embed, youtube, video-container, etc)
if (preg_match_all('/<div[^>]*class="[^"]*(?:embed|video|youtube|vimeo|player)[^"]*"[^>]*>.*?<\/div>/is', $content, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[0] as $match) {
$zones[] = [
'start' => $match[1],
'end' => $match[1] + strlen($match[0]),
];
}
}
// Figure con iframe (embeds de WordPress)
if (preg_match_all('/<figure[^>]*class="[^"]*wp-block-embed[^"]*"[^>]*>.*?<\/figure>/is', $content, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[0] as $match) {
$zones[] = [
'start' => $match[1],
'end' => $match[1] + strlen($match[0]),
];
}
}
return $zones;
}
/**
* Verifica si una posicion esta dentro de una zona prohibida
*/
private function isInForbiddenZone(int $position, array $forbiddenZones): bool
{
foreach ($forbiddenZones as $zone) {
if ($position >= $zone['start'] && $position <= $zone['end']) {
return true;
}
}
return false;
}
/**
* Convierte tag de cierre a tipo de elemento
* NOTA: </table> excluido - no insertamos ads despues de tablas
*/
private function getTypeFromTag(string $tag): ?string
{
@@ -219,16 +300,15 @@ final class ContentAdInjector
'</h3>' => 'h3',
'</figure>' => 'image',
'</ul>', '</ol>' => 'list',
'</table>' => 'table',
'</blockquote>' => 'blockquote',
default => null,
};
}
/**
* Detecta imagenes que no estan dentro de figure
* Detecta imagenes que no estan dentro de figure ni zonas prohibidas
*/
private function scanStandaloneImages(string $content, int $startIndex): array
private function scanStandaloneImages(string $content, int $startIndex, array $forbiddenZones = []): array
{
$locations = [];
@@ -241,6 +321,11 @@ final class ContentAdInjector
$imgTag = $match[0];
$imgPosition = $match[1];
// Verificar que no este en zona prohibida
if ($this->isInForbiddenZone($imgPosition, $forbiddenZones)) {
continue;
}
// Verificar si hay un <figure> abierto antes de esta imagen
$contentBefore = substr($content, 0, $imgPosition);
$lastFigureOpen = strrpos($contentBefore, '<figure');