diff --git a/Shared/Infrastructure/Scripts/fix-malformed-lists-dom.php b/Shared/Infrastructure/Scripts/fix-malformed-lists-dom.php new file mode 100644 index 00000000..f13caf0e --- /dev/null +++ b/Shared/Infrastructure/Scripts/fix-malformed-lists-dom.php @@ -0,0 +1,411 @@ +/
    conteniendo elementos no-
  1. como hijos directos + * - Listas anidadas que son hermanas en lugar de hijas de
  2. + * + * USO: + * php fix-malformed-lists-dom.php --mode=scan # Solo escanear + * php fix-malformed-lists-dom.php --mode=test # Probar corrección (1 post) + * php fix-malformed-lists-dom.php --mode=fix # Aplicar correcciones + * + * @package ROI_Theme + * @since Phase 4.4 Accessibility + */ + +error_reporting(E_ALL); +ini_set('display_errors', 1); +ini_set('memory_limit', '512M'); +set_time_limit(600); + +// Configuración +$db_config = [ + 'host' => 'localhost', + 'database' => 'preciosunitarios_seo', + 'username' => 'preciosunitarios_seo', + 'password' => 'ACl%EEFd=V-Yvb??', + 'charset' => 'utf8mb4' +]; + +// Parsear argumentos +$mode = 'scan'; +foreach ($argv as $arg) { + if (strpos($arg, '--mode=') === 0) { + $mode = substr($arg, 7); + } +} + +echo "==============================================\n"; +echo " CORRECTOR DE LISTAS - DOMDocument\n"; +echo " Modo: $mode\n"; +echo " Fecha: " . date('Y-m-d H:i:s') . "\n"; +echo "==============================================\n\n"; + +/** + * Conectar a la base de datos + */ +function connectDatabase(array $config): ?mysqli { + $conn = new mysqli( + $config['host'], + $config['username'], + $config['password'], + $config['database'] + ); + if ($conn->connect_error) { + echo "Error de conexión: " . $conn->connect_error . "\n"; + return null; + } + $conn->set_charset($config['charset']); + return $conn; +} + +/** + * Corregir listas mal formadas usando DOMDocument + */ +function fixMalformedLists(string $html): array { + $result = [ + 'fixed' => false, + 'html' => $html, + 'changes' => 0, + 'details' => [] + ]; + + // Suprimir errores de HTML mal formado + libxml_use_internal_errors(true); + + $doc = new DOMDocument('1.0', 'UTF-8'); + + // Envolver en contenedor para preservar estructura + $wrapped = '
    ' . $html . '
    '; + $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + + libxml_clear_errors(); + + // Procesar todas las listas (ul y ol) + $lists = []; + foreach ($doc->getElementsByTagName('ul') as $ul) { + $lists[] = $ul; + } + foreach ($doc->getElementsByTagName('ol') as $ol) { + $lists[] = $ol; + } + + $changes = 0; + + foreach ($lists as $list) { + $changes += fixListChildren($list, $result['details']); + } + + if ($changes > 0) { + // Extraer HTML corregido + $wrapper = $doc->getElementById('temp-wrapper'); + if ($wrapper) { + $innerHTML = ''; + foreach ($wrapper->childNodes as $child) { + $innerHTML .= $doc->saveHTML($child); + } + $result['html'] = $innerHTML; + $result['fixed'] = true; + $result['changes'] = $changes; + } + } + + return $result; +} + +/** + * Corregir hijos de una lista (solo debe contener li, script, template) + */ +function fixListChildren(DOMElement $list, array &$details): int { + $changes = 0; + $validChildren = ['li', 'script', 'template']; + $nodesToProcess = []; + + // Recopilar nodos que necesitan corrección + foreach ($list->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $tagName = strtolower($child->nodeName); + if (!in_array($tagName, $validChildren)) { + $nodesToProcess[] = $child; + } + } + } + + // Procesar cada nodo inválido + foreach ($nodesToProcess as $node) { + $tagName = strtolower($node->nodeName); + + // Si es una lista anidada (ul/ol), envolverla en
  3. + if ($tagName === 'ul' || $tagName === 'ol') { + $changes += wrapInLi($list, $node, $details); + } + // Otros elementos inválidos también se envuelven en
  4. + else { + $changes += wrapInLi($list, $node, $details); + } + } + + return $changes; +} + +/** + * Envolver un nodo en
  5. o moverlo al
  6. anterior + */ +function wrapInLi(DOMElement $list, DOMNode $node, array &$details): int { + $doc = $list->ownerDocument; + $tagName = strtolower($node->nodeName); + + // Buscar el
  7. hermano anterior + $prevLi = null; + $prev = $node->previousSibling; + while ($prev) { + if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') { + $prevLi = $prev; + break; + } + $prev = $prev->previousSibling; + } + + if ($prevLi) { + // Mover el nodo al final del
  8. anterior + $prevLi->appendChild($node); + $details[] = "Movido <$tagName> dentro del
  9. anterior"; + return 1; + } else { + // No hay
  10. anterior, crear uno nuevo + $newLi = $doc->createElement('li'); + $list->insertBefore($newLi, $node); + $newLi->appendChild($node); + $details[] = "Envuelto <$tagName> en nuevo
  11. "; + return 1; + } +} + +/** + * Detectar problemas en HTML sin corregir + */ +function detectIssues(string $html): array { + $issues = []; + + libxml_use_internal_errors(true); + $doc = new DOMDocument('1.0', 'UTF-8'); + $wrapped = '
    ' . $html . '
    '; + $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + libxml_clear_errors(); + + $validChildren = ['li', 'script', 'template']; + + // Revisar ul + foreach ($doc->getElementsByTagName('ul') as $ul) { + foreach ($ul->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $tagName = strtolower($child->nodeName); + if (!in_array($tagName, $validChildren)) { + $issues[] = [ + 'list_type' => 'ul', + 'invalid_child' => $tagName, + 'context' => getNodeContext($child) + ]; + } + } + } + } + + // Revisar ol + foreach ($doc->getElementsByTagName('ol') as $ol) { + foreach ($ol->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $tagName = strtolower($child->nodeName); + if (!in_array($tagName, $validChildren)) { + $issues[] = [ + 'list_type' => 'ol', + 'invalid_child' => $tagName, + 'context' => getNodeContext($child) + ]; + } + } + } + } + + return $issues; +} + +/** + * Obtener contexto de un nodo para debug + */ +function getNodeContext(DOMNode $node): string { + $doc = $node->ownerDocument; + $html = $doc->saveHTML($node); + return substr($html, 0, 100) . (strlen($html) > 100 ? '...' : ''); +} + +// ============================================ +// EJECUCIÓN PRINCIPAL +// ============================================ + +$conn = connectDatabase($db_config); +if (!$conn) { + exit(1); +} + +echo "✓ Conexión establecida\n\n"; + +// Contar registros +$result = $conn->query("SELECT COUNT(*) as total FROM datos_seo_pagina WHERE html IS NOT NULL AND html != ''"); +$total = $result->fetch_assoc()['total']; +echo "Total de registros: $total\n\n"; + +if ($mode === 'scan') { + // MODO SCAN: Solo detectar problemas + echo "MODO: ESCANEO (solo detección)\n"; + echo "─────────────────────────────────\n\n"; + + $batch_size = 100; + $offset = 0; + $affected = 0; + $total_issues = 0; + + while ($offset < $total) { + $query = "SELECT id, page, html FROM datos_seo_pagina + WHERE html IS NOT NULL AND html != '' + ORDER BY id LIMIT $batch_size OFFSET $offset"; + $result = $conn->query($query); + + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['html']); + if (!empty($issues)) { + $affected++; + $total_issues += count($issues); + + if ($affected <= 20) { + echo "[ID: {$row['id']}] " . count($issues) . " problema(s)\n"; + echo "URL: {$row['page']}\n"; + foreach (array_slice($issues, 0, 2) as $issue) { + echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n"; + } + echo "\n"; + } + } + } + $offset += $batch_size; + + if ($offset % 1000 == 0) { + echo "Procesados: $offset/$total...\n"; + } + } + + echo "─────────────────────────────────\n"; + echo "RESUMEN:\n"; + echo " Posts afectados: $affected\n"; + echo " Total incidencias: $total_issues\n"; + +} elseif ($mode === 'test') { + // MODO TEST: Probar corrección en 1 post + echo "MODO: PRUEBA (sin guardar)\n"; + echo "─────────────────────────────────\n\n"; + + // Buscar primer post con problemas + $query = "SELECT id, page, html FROM datos_seo_pagina + WHERE html IS NOT NULL AND html != '' + ORDER BY id LIMIT 100"; + $result = $conn->query($query); + + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['html']); + if (!empty($issues)) { + echo "POST ID: {$row['id']}\n"; + echo "URL: {$row['page']}\n"; + echo "Problemas detectados: " . count($issues) . "\n\n"; + + echo "ANTES (problemas):\n"; + foreach (array_slice($issues, 0, 3) as $issue) { + echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n"; + echo " Contexto: " . htmlspecialchars(substr($issue['context'], 0, 80)) . "\n"; + } + + // Aplicar corrección + $fixResult = fixMalformedLists($row['html']); + + echo "\nDESPUÉS (corrección):\n"; + echo " Cambios realizados: {$fixResult['changes']}\n"; + foreach ($fixResult['details'] as $detail) { + echo " - $detail\n"; + } + + // Verificar que no quedan problemas + $issuesAfter = detectIssues($fixResult['html']); + echo "\nVERIFICACIÓN:\n"; + echo " Problemas antes: " . count($issues) . "\n"; + echo " Problemas después: " . count($issuesAfter) . "\n"; + + if (count($issuesAfter) < count($issues)) { + echo " ✓ Reducción de problemas\n"; + } + + // Mostrar fragmento del HTML corregido + if ($fixResult['fixed']) { + echo "\nMUESTRA HTML CORREGIDO (primeros 500 chars):\n"; + echo "─────────────────────────────────\n"; + echo htmlspecialchars(substr($fixResult['html'], 0, 500)) . "...\n"; + } + + break; + } + } + +} elseif ($mode === 'fix') { + // MODO FIX: Aplicar correcciones + echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n"; + echo "─────────────────────────────────\n\n"; + + $batch_size = 50; + $offset = 0; + $fixed_count = 0; + $error_count = 0; + + while ($offset < $total) { + $query = "SELECT id, page, html FROM datos_seo_pagina + WHERE html IS NOT NULL AND html != '' + ORDER BY id LIMIT $batch_size OFFSET $offset"; + $result = $conn->query($query); + + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['html']); + + if (!empty($issues)) { + $fixResult = fixMalformedLists($row['html']); + + if ($fixResult['fixed']) { + // Guardar HTML corregido + $stmt = $conn->prepare("UPDATE datos_seo_pagina SET html = ? WHERE id = ?"); + $stmt->bind_param("si", $fixResult['html'], $row['id']); + + if ($stmt->execute()) { + $fixed_count++; + echo "[ID: {$row['id']}] ✓ Corregido ({$fixResult['changes']} cambios)\n"; + } else { + $error_count++; + echo "[ID: {$row['id']}] ✗ Error al guardar\n"; + } + $stmt->close(); + } + } + } + + $offset += $batch_size; + + if ($offset % 500 == 0) { + echo "Procesados: $offset/$total (corregidos: $fixed_count)\n"; + } + } + + echo "\n─────────────────────────────────\n"; + echo "RESUMEN:\n"; + echo " Posts corregidos: $fixed_count\n"; + echo " Errores: $error_count\n"; +} + +$conn->close(); +echo "\n✓ Proceso completado.\n";