/
conteniendo elementos no-- como hijos directos
* - Listas anidadas que son hermanas en lugar de hijas de
-
*
* USO:
* php fix-malformed-lists-dom.php --mode=scan # Solo escanear
* php fix-malformed-lists-dom.php --mode=test # Probar corrección (1 post)
* php fix-malformed-lists-dom.php --mode=fix # Aplicar correcciones
*
* @package ROI_Theme
* @since Phase 4.4 Accessibility
*/
error_reporting(E_ALL);
ini_set('display_errors', 1);
ini_set('memory_limit', '512M');
set_time_limit(600);
// Configuración
$db_config = [
'host' => 'localhost',
'database' => 'preciosunitarios_seo',
'username' => 'preciosunitarios_seo',
'password' => 'ACl%EEFd=V-Yvb??',
'charset' => 'utf8mb4'
];
// Parsear argumentos
$mode = 'scan';
foreach ($argv as $arg) {
if (strpos($arg, '--mode=') === 0) {
$mode = substr($arg, 7);
}
}
echo "==============================================\n";
echo " CORRECTOR DE LISTAS - DOMDocument\n";
echo " Modo: $mode\n";
echo " Fecha: " . date('Y-m-d H:i:s') . "\n";
echo "==============================================\n\n";
/**
* Conectar a la base de datos
*/
function connectDatabase(array $config): ?mysqli {
$conn = new mysqli(
$config['host'],
$config['username'],
$config['password'],
$config['database']
);
if ($conn->connect_error) {
echo "Error de conexión: " . $conn->connect_error . "\n";
return null;
}
$conn->set_charset($config['charset']);
return $conn;
}
/**
* Corregir listas mal formadas usando DOMDocument
*/
function fixMalformedLists(string $html): array {
$result = [
'fixed' => false,
'html' => $html,
'changes' => 0,
'details' => []
];
// Suprimir errores de HTML mal formado
libxml_use_internal_errors(true);
$doc = new DOMDocument('1.0', 'UTF-8');
// Envolver en contenedor para preservar estructura
$wrapped = '
' . $html . '
';
$doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
// Procesar todas las listas (ul y ol)
$lists = [];
foreach ($doc->getElementsByTagName('ul') as $ul) {
$lists[] = $ul;
}
foreach ($doc->getElementsByTagName('ol') as $ol) {
$lists[] = $ol;
}
$changes = 0;
foreach ($lists as $list) {
$changes += fixListChildren($list, $result['details']);
}
if ($changes > 0) {
// Extraer HTML corregido
$wrapper = $doc->getElementById('temp-wrapper');
if ($wrapper) {
$innerHTML = '';
foreach ($wrapper->childNodes as $child) {
$innerHTML .= $doc->saveHTML($child);
}
$result['html'] = $innerHTML;
$result['fixed'] = true;
$result['changes'] = $changes;
}
}
return $result;
}
/**
* Corregir hijos de una lista (solo debe contener li, script, template)
*/
function fixListChildren(DOMElement $list, array &$details): int {
$changes = 0;
$validChildren = ['li', 'script', 'template'];
$nodesToProcess = [];
// Recopilar nodos que necesitan corrección
foreach ($list->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtolower($child->nodeName);
if (!in_array($tagName, $validChildren)) {
$nodesToProcess[] = $child;
}
}
}
// Procesar cada nodo inválido
foreach ($nodesToProcess as $node) {
$tagName = strtolower($node->nodeName);
// Si es una lista anidada (ul/ol), envolverla en -
if ($tagName === 'ul' || $tagName === 'ol') {
$changes += wrapInLi($list, $node, $details);
}
// Otros elementos inválidos también se envuelven en
-
else {
$changes += wrapInLi($list, $node, $details);
}
}
return $changes;
}
/**
* Envolver un nodo en
- o moverlo al
- anterior
*/
function wrapInLi(DOMElement $list, DOMNode $node, array &$details): int {
$doc = $list->ownerDocument;
$tagName = strtolower($node->nodeName);
// Buscar el
- hermano anterior
$prevLi = null;
$prev = $node->previousSibling;
while ($prev) {
if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') {
$prevLi = $prev;
break;
}
$prev = $prev->previousSibling;
}
if ($prevLi) {
// Mover el nodo al final del
- anterior
$prevLi->appendChild($node);
$details[] = "Movido <$tagName> dentro del
- anterior";
return 1;
} else {
// No hay
- anterior, crear uno nuevo
$newLi = $doc->createElement('li');
$list->insertBefore($newLi, $node);
$newLi->appendChild($node);
$details[] = "Envuelto <$tagName> en nuevo
- ";
return 1;
}
}
/**
* Detectar problemas en HTML sin corregir
*/
function detectIssues(string $html): array {
$issues = [];
libxml_use_internal_errors(true);
$doc = new DOMDocument('1.0', 'UTF-8');
$wrapped = '
' . $html . '
';
$doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$validChildren = ['li', 'script', 'template'];
// Revisar ul
foreach ($doc->getElementsByTagName('ul') as $ul) {
foreach ($ul->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtolower($child->nodeName);
if (!in_array($tagName, $validChildren)) {
$issues[] = [
'list_type' => 'ul',
'invalid_child' => $tagName,
'context' => getNodeContext($child)
];
}
}
}
}
// Revisar ol
foreach ($doc->getElementsByTagName('ol') as $ol) {
foreach ($ol->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtolower($child->nodeName);
if (!in_array($tagName, $validChildren)) {
$issues[] = [
'list_type' => 'ol',
'invalid_child' => $tagName,
'context' => getNodeContext($child)
];
}
}
}
}
return $issues;
}
/**
* Obtener contexto de un nodo para debug
*/
function getNodeContext(DOMNode $node): string {
$doc = $node->ownerDocument;
$html = $doc->saveHTML($node);
return substr($html, 0, 100) . (strlen($html) > 100 ? '...' : '');
}
// ============================================
// EJECUCIÓN PRINCIPAL
// ============================================
$conn = connectDatabase($db_config);
if (!$conn) {
exit(1);
}
echo "✓ Conexión establecida\n\n";
// Contar registros
$result = $conn->query("SELECT COUNT(*) as total FROM datos_seo_pagina WHERE html IS NOT NULL AND html != ''");
$total = $result->fetch_assoc()['total'];
echo "Total de registros: $total\n\n";
if ($mode === 'scan') {
// MODO SCAN: Solo detectar problemas
echo "MODO: ESCANEO (solo detección)\n";
echo "─────────────────────────────────\n\n";
$batch_size = 100;
$offset = 0;
$affected = 0;
$total_issues = 0;
while ($offset < $total) {
$query = "SELECT id, page, html FROM datos_seo_pagina
WHERE html IS NOT NULL AND html != ''
ORDER BY id LIMIT $batch_size OFFSET $offset";
$result = $conn->query($query);
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['html']);
if (!empty($issues)) {
$affected++;
$total_issues += count($issues);
if ($affected <= 20) {
echo "[ID: {$row['id']}] " . count($issues) . " problema(s)\n";
echo "URL: {$row['page']}\n";
foreach (array_slice($issues, 0, 2) as $issue) {
echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n";
}
echo "\n";
}
}
}
$offset += $batch_size;
if ($offset % 1000 == 0) {
echo "Procesados: $offset/$total...\n";
}
}
echo "─────────────────────────────────\n";
echo "RESUMEN:\n";
echo " Posts afectados: $affected\n";
echo " Total incidencias: $total_issues\n";
} elseif ($mode === 'test') {
// MODO TEST: Probar corrección en 1 post
echo "MODO: PRUEBA (sin guardar)\n";
echo "─────────────────────────────────\n\n";
// Buscar primer post con problemas
$query = "SELECT id, page, html FROM datos_seo_pagina
WHERE html IS NOT NULL AND html != ''
ORDER BY id LIMIT 100";
$result = $conn->query($query);
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['html']);
if (!empty($issues)) {
echo "POST ID: {$row['id']}\n";
echo "URL: {$row['page']}\n";
echo "Problemas detectados: " . count($issues) . "\n\n";
echo "ANTES (problemas):\n";
foreach (array_slice($issues, 0, 3) as $issue) {
echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n";
echo " Contexto: " . htmlspecialchars(substr($issue['context'], 0, 80)) . "\n";
}
// Aplicar corrección
$fixResult = fixMalformedLists($row['html']);
echo "\nDESPUÉS (corrección):\n";
echo " Cambios realizados: {$fixResult['changes']}\n";
foreach ($fixResult['details'] as $detail) {
echo " - $detail\n";
}
// Verificar que no quedan problemas
$issuesAfter = detectIssues($fixResult['html']);
echo "\nVERIFICACIÓN:\n";
echo " Problemas antes: " . count($issues) . "\n";
echo " Problemas después: " . count($issuesAfter) . "\n";
if (count($issuesAfter) < count($issues)) {
echo " ✓ Reducción de problemas\n";
}
// Mostrar fragmento del HTML corregido
if ($fixResult['fixed']) {
echo "\nMUESTRA HTML CORREGIDO (primeros 500 chars):\n";
echo "─────────────────────────────────\n";
echo htmlspecialchars(substr($fixResult['html'], 0, 500)) . "...\n";
}
break;
}
}
} elseif ($mode === 'fix') {
// MODO FIX: Aplicar correcciones
echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n";
echo "─────────────────────────────────\n\n";
$batch_size = 50;
$offset = 0;
$fixed_count = 0;
$error_count = 0;
while ($offset < $total) {
$query = "SELECT id, page, html FROM datos_seo_pagina
WHERE html IS NOT NULL AND html != ''
ORDER BY id LIMIT $batch_size OFFSET $offset";
$result = $conn->query($query);
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['html']);
if (!empty($issues)) {
$fixResult = fixMalformedLists($row['html']);
if ($fixResult['fixed']) {
// Guardar HTML corregido
$stmt = $conn->prepare("UPDATE datos_seo_pagina SET html = ? WHERE id = ?");
$stmt->bind_param("si", $fixResult['html'], $row['id']);
if ($stmt->execute()) {
$fixed_count++;
echo "[ID: {$row['id']}] ✓ Corregido ({$fixResult['changes']} cambios)\n";
} else {
$error_count++;
echo "[ID: {$row['id']}] ✗ Error al guardar\n";
}
$stmt->close();
}
}
}
$offset += $batch_size;
if ($offset % 500 == 0) {
echo "Procesados: $offset/$total (corregidos: $fixed_count)\n";
}
}
echo "\n─────────────────────────────────\n";
echo "RESUMEN:\n";
echo " Posts corregidos: $fixed_count\n";
echo " Errores: $error_count\n";
}
$conn->close();
echo "\n✓ Proceso completado.\n";