/
    conteniendo elementos no-
  1. como hijos directos * - Listas anidadas que son hermanas en lugar de hijas de
  2. * * USO: * php fix-malformed-lists-dom.php --mode=scan # Solo escanear * php fix-malformed-lists-dom.php --mode=test # Probar corrección (1 post) * php fix-malformed-lists-dom.php --mode=fix # Aplicar correcciones * * @package ROI_Theme * @since Phase 4.4 Accessibility */ error_reporting(E_ALL); ini_set('display_errors', 1); ini_set('memory_limit', '512M'); set_time_limit(600); // Configuración $db_config = [ 'host' => 'localhost', 'database' => 'preciosunitarios_seo', 'username' => 'preciosunitarios_seo', 'password' => 'ACl%EEFd=V-Yvb??', 'charset' => 'utf8mb4' ]; // Parsear argumentos $mode = 'scan'; foreach ($argv as $arg) { if (strpos($arg, '--mode=') === 0) { $mode = substr($arg, 7); } } echo "==============================================\n"; echo " CORRECTOR DE LISTAS - DOMDocument\n"; echo " Modo: $mode\n"; echo " Fecha: " . date('Y-m-d H:i:s') . "\n"; echo "==============================================\n\n"; /** * Conectar a la base de datos */ function connectDatabase(array $config): ?mysqli { $conn = new mysqli( $config['host'], $config['username'], $config['password'], $config['database'] ); if ($conn->connect_error) { echo "Error de conexión: " . $conn->connect_error . "\n"; return null; } $conn->set_charset($config['charset']); return $conn; } /** * Corregir listas mal formadas usando DOMDocument */ function fixMalformedLists(string $html): array { $result = [ 'fixed' => false, 'html' => $html, 'changes' => 0, 'details' => [] ]; // Suprimir errores de HTML mal formado libxml_use_internal_errors(true); $doc = new DOMDocument('1.0', 'UTF-8'); // Envolver en contenedor para preservar estructura $wrapped = '
    ' . $html . '
    '; $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); // Procesar todas las listas (ul y ol) $lists = []; foreach ($doc->getElementsByTagName('ul') as $ul) { $lists[] = $ul; } foreach ($doc->getElementsByTagName('ol') as $ol) { $lists[] = $ol; } $changes = 0; foreach ($lists as $list) { $changes += fixListChildren($list, $result['details']); } if ($changes > 0) { // Extraer HTML corregido $wrapper = $doc->getElementById('temp-wrapper'); if ($wrapper) { $innerHTML = ''; foreach ($wrapper->childNodes as $child) { $innerHTML .= $doc->saveHTML($child); } $result['html'] = $innerHTML; $result['fixed'] = true; $result['changes'] = $changes; } } return $result; } /** * Corregir hijos de una lista (solo debe contener li, script, template) */ function fixListChildren(DOMElement $list, array &$details): int { $changes = 0; $validChildren = ['li', 'script', 'template']; $nodesToProcess = []; // Recopilar nodos que necesitan corrección foreach ($list->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE) { $tagName = strtolower($child->nodeName); if (!in_array($tagName, $validChildren)) { $nodesToProcess[] = $child; } } } // Procesar cada nodo inválido foreach ($nodesToProcess as $node) { $tagName = strtolower($node->nodeName); // Si es una lista anidada (ul/ol), envolverla en
  3. if ($tagName === 'ul' || $tagName === 'ol') { $changes += wrapInLi($list, $node, $details); } // Otros elementos inválidos también se envuelven en
  4. else { $changes += wrapInLi($list, $node, $details); } } return $changes; } /** * Envolver un nodo en
  5. o moverlo al
  6. anterior */ function wrapInLi(DOMElement $list, DOMNode $node, array &$details): int { $doc = $list->ownerDocument; $tagName = strtolower($node->nodeName); // Buscar el
  7. hermano anterior $prevLi = null; $prev = $node->previousSibling; while ($prev) { if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') { $prevLi = $prev; break; } $prev = $prev->previousSibling; } if ($prevLi) { // Mover el nodo al final del
  8. anterior $prevLi->appendChild($node); $details[] = "Movido <$tagName> dentro del
  9. anterior"; return 1; } else { // No hay
  10. anterior, crear uno nuevo $newLi = $doc->createElement('li'); $list->insertBefore($newLi, $node); $newLi->appendChild($node); $details[] = "Envuelto <$tagName> en nuevo
  11. "; return 1; } } /** * Detectar problemas en HTML sin corregir */ function detectIssues(string $html): array { $issues = []; libxml_use_internal_errors(true); $doc = new DOMDocument('1.0', 'UTF-8'); $wrapped = '
    ' . $html . '
    '; $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); $validChildren = ['li', 'script', 'template']; // Revisar ul foreach ($doc->getElementsByTagName('ul') as $ul) { foreach ($ul->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE) { $tagName = strtolower($child->nodeName); if (!in_array($tagName, $validChildren)) { $issues[] = [ 'list_type' => 'ul', 'invalid_child' => $tagName, 'context' => getNodeContext($child) ]; } } } } // Revisar ol foreach ($doc->getElementsByTagName('ol') as $ol) { foreach ($ol->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE) { $tagName = strtolower($child->nodeName); if (!in_array($tagName, $validChildren)) { $issues[] = [ 'list_type' => 'ol', 'invalid_child' => $tagName, 'context' => getNodeContext($child) ]; } } } } return $issues; } /** * Obtener contexto de un nodo para debug */ function getNodeContext(DOMNode $node): string { $doc = $node->ownerDocument; $html = $doc->saveHTML($node); return substr($html, 0, 100) . (strlen($html) > 100 ? '...' : ''); } // ============================================ // EJECUCIÓN PRINCIPAL // ============================================ $conn = connectDatabase($db_config); if (!$conn) { exit(1); } echo "✓ Conexión establecida\n\n"; // Contar registros $result = $conn->query("SELECT COUNT(*) as total FROM datos_seo_pagina WHERE html IS NOT NULL AND html != ''"); $total = $result->fetch_assoc()['total']; echo "Total de registros: $total\n\n"; if ($mode === 'scan') { // MODO SCAN: Solo detectar problemas echo "MODO: ESCANEO (solo detección)\n"; echo "─────────────────────────────────\n\n"; $batch_size = 100; $offset = 0; $affected = 0; $total_issues = 0; while ($offset < $total) { $query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id LIMIT $batch_size OFFSET $offset"; $result = $conn->query($query); while ($row = $result->fetch_assoc()) { $issues = detectIssues($row['html']); if (!empty($issues)) { $affected++; $total_issues += count($issues); if ($affected <= 20) { echo "[ID: {$row['id']}] " . count($issues) . " problema(s)\n"; echo "URL: {$row['page']}\n"; foreach (array_slice($issues, 0, 2) as $issue) { echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n"; } echo "\n"; } } } $offset += $batch_size; if ($offset % 1000 == 0) { echo "Procesados: $offset/$total...\n"; } } echo "─────────────────────────────────\n"; echo "RESUMEN:\n"; echo " Posts afectados: $affected\n"; echo " Total incidencias: $total_issues\n"; } elseif ($mode === 'test') { // MODO TEST: Probar corrección en 1 post echo "MODO: PRUEBA (sin guardar)\n"; echo "─────────────────────────────────\n\n"; // Buscar primer post con problemas $query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id LIMIT 100"; $result = $conn->query($query); while ($row = $result->fetch_assoc()) { $issues = detectIssues($row['html']); if (!empty($issues)) { echo "POST ID: {$row['id']}\n"; echo "URL: {$row['page']}\n"; echo "Problemas detectados: " . count($issues) . "\n\n"; echo "ANTES (problemas):\n"; foreach (array_slice($issues, 0, 3) as $issue) { echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n"; echo " Contexto: " . htmlspecialchars(substr($issue['context'], 0, 80)) . "\n"; } // Aplicar corrección $fixResult = fixMalformedLists($row['html']); echo "\nDESPUÉS (corrección):\n"; echo " Cambios realizados: {$fixResult['changes']}\n"; foreach ($fixResult['details'] as $detail) { echo " - $detail\n"; } // Verificar que no quedan problemas $issuesAfter = detectIssues($fixResult['html']); echo "\nVERIFICACIÓN:\n"; echo " Problemas antes: " . count($issues) . "\n"; echo " Problemas después: " . count($issuesAfter) . "\n"; if (count($issuesAfter) < count($issues)) { echo " ✓ Reducción de problemas\n"; } // Mostrar fragmento del HTML corregido if ($fixResult['fixed']) { echo "\nMUESTRA HTML CORREGIDO (primeros 500 chars):\n"; echo "─────────────────────────────────\n"; echo htmlspecialchars(substr($fixResult['html'], 0, 500)) . "...\n"; } break; } } } elseif ($mode === 'fix') { // MODO FIX: Aplicar correcciones echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n"; echo "─────────────────────────────────\n\n"; $batch_size = 50; $offset = 0; $fixed_count = 0; $error_count = 0; while ($offset < $total) { $query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id LIMIT $batch_size OFFSET $offset"; $result = $conn->query($query); while ($row = $result->fetch_assoc()) { $issues = detectIssues($row['html']); if (!empty($issues)) { $fixResult = fixMalformedLists($row['html']); if ($fixResult['fixed']) { // Guardar HTML corregido $stmt = $conn->prepare("UPDATE datos_seo_pagina SET html = ? WHERE id = ?"); $stmt->bind_param("si", $fixResult['html'], $row['id']); if ($stmt->execute()) { $fixed_count++; echo "[ID: {$row['id']}] ✓ Corregido ({$fixResult['changes']} cambios)\n"; } else { $error_count++; echo "[ID: {$row['id']}] ✗ Error al guardar\n"; } $stmt->close(); } } } $offset += $batch_size; if ($offset % 500 == 0) { echo "Procesados: $offset/$total (corregidos: $fixed_count)\n"; } } echo "\n─────────────────────────────────\n"; echo "RESUMEN:\n"; echo " Posts corregidos: $fixed_count\n"; echo " Errores: $error_count\n"; } $conn->close(); echo "\n✓ Proceso completado.\n";