From 3279b7df2bdc0b8aae6e6743913bd3111e266157 Mon Sep 17 00:00:00 2001 From: FrankZamora Date: Thu, 27 Nov 2025 18:10:37 -0600 Subject: [PATCH] Add WordPress posts malformed lists fixer for post_content field --- .../Scripts/fix-malformed-lists-wp-posts.php | 322 ++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100644 Shared/Infrastructure/Scripts/fix-malformed-lists-wp-posts.php diff --git a/Shared/Infrastructure/Scripts/fix-malformed-lists-wp-posts.php b/Shared/Infrastructure/Scripts/fix-malformed-lists-wp-posts.php new file mode 100644 index 00000000..ce4fdbd1 --- /dev/null +++ b/Shared/Infrastructure/Scripts/fix-malformed-lists-wp-posts.php @@ -0,0 +1,322 @@ + 'localhost', + 'database' => 'preciosunitarios_wp', + 'username' => 'preciosunitarios_wp', + 'password' => 'Kq#Gk%yEt+PWpVe&HZ', + 'charset' => 'utf8mb4' +]; + +$mode = 'scan'; +foreach ($argv as $arg) { + if (strpos($arg, '--mode=') === 0) { + $mode = substr($arg, 7); + } +} + +echo "==============================================\n"; +echo " CORRECTOR DE LISTAS - WordPress Posts\n"; +echo " Base de datos: {$db_config['database']}\n"; +echo " Tabla: wp_posts (post_content)\n"; +echo " Modo: $mode\n"; +echo " Fecha: " . date('Y-m-d H:i:s') . "\n"; +echo "==============================================\n\n"; + +function connectDatabase(array $config): ?mysqli { + $conn = new mysqli($config['host'], $config['username'], $config['password'], $config['database']); + if ($conn->connect_error) { + echo "Error de conexión: " . $conn->connect_error . "\n"; + return null; + } + $conn->set_charset($config['charset']); + return $conn; +} + +function detectIssues(string $html): array { + $issues = []; + if (empty(trim($html))) return $issues; + + libxml_use_internal_errors(true); + $doc = new DOMDocument('1.0', 'UTF-8'); + $wrapped = '
' . $html . '
'; + $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + libxml_clear_errors(); + + $validChildren = ['li', 'script', 'template']; + + foreach (['ul', 'ol'] as $listTag) { + foreach ($doc->getElementsByTagName($listTag) as $list) { + foreach ($list->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $tagName = strtolower($child->nodeName); + if (!in_array($tagName, $validChildren)) { + $issues[] = [ + 'list_type' => $listTag, + 'invalid_child' => $tagName + ]; + } + } + } + } + } + + return $issues; +} + +function fixMalformedLists(string $html): array { + $result = ['fixed' => false, 'html' => $html, 'changes' => 0, 'details' => []]; + + if (empty(trim($html))) return $result; + + libxml_use_internal_errors(true); + $doc = new DOMDocument('1.0', 'UTF-8'); + $wrapped = '
' . $html . '
'; + $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + libxml_clear_errors(); + + $lists = []; + foreach ($doc->getElementsByTagName('ul') as $ul) { $lists[] = $ul; } + foreach ($doc->getElementsByTagName('ol') as $ol) { $lists[] = $ol; } + + $changes = 0; + $validChildren = ['li', 'script', 'template']; + + foreach ($lists as $list) { + $nodesToProcess = []; + foreach ($list->childNodes as $child) { + if ($child->nodeType === XML_ELEMENT_NODE) { + $tagName = strtolower($child->nodeName); + if (!in_array($tagName, $validChildren)) { + $nodesToProcess[] = $child; + } + } + } + + foreach ($nodesToProcess as $node) { + $tagName = strtolower($node->nodeName); + $prevLi = null; + $prev = $node->previousSibling; + + while ($prev) { + if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') { + $prevLi = $prev; + break; + } + $prev = $prev->previousSibling; + } + + if ($prevLi) { + $prevLi->appendChild($node); + $result['details'][] = "Movido <$tagName> dentro del
  • anterior"; + $changes++; + } else { + $newLi = $doc->createElement('li'); + $list->insertBefore($newLi, $node); + $newLi->appendChild($node); + $result['details'][] = "Envuelto <$tagName> en nuevo
  • "; + $changes++; + } + } + } + + if ($changes > 0) { + $wrapper = $doc->getElementById('temp-wrapper'); + if ($wrapper) { + $innerHTML = ''; + foreach ($wrapper->childNodes as $child) { + $innerHTML .= $doc->saveHTML($child); + } + $result['html'] = $innerHTML; + $result['fixed'] = true; + $result['changes'] = $changes; + } + } + + return $result; +} + +// EJECUCIÓN PRINCIPAL +$conn = connectDatabase($db_config); +if (!$conn) { + exit(1); +} + +echo "✓ Conexión establecida\n\n"; + +// Solo posts publicados con contenido +$countQuery = "SELECT COUNT(*) as total FROM wp_posts + WHERE post_status = 'publish' + AND post_type IN ('post', 'page') + AND post_content IS NOT NULL + AND post_content != ''"; +$result = $conn->query($countQuery); +$total = $result->fetch_assoc()['total']; +echo "Total de posts/páginas publicados: $total\n\n"; + +if ($mode === 'scan') { + echo "MODO: ESCANEO (solo detección)\n"; + echo "─────────────────────────────────\n\n"; + + $batch_size = 100; + $offset = 0; + $affected = 0; + $total_issues = 0; + + while ($offset < $total) { + $query = "SELECT ID, post_title, post_content, guid FROM wp_posts + WHERE post_status = 'publish' + AND post_type IN ('post', 'page') + AND post_content IS NOT NULL + AND post_content != '' + ORDER BY ID LIMIT $batch_size OFFSET $offset"; + $result = $conn->query($query); + + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['post_content']); + if (!empty($issues)) { + $affected++; + $total_issues += count($issues); + + if ($affected <= 20) { + echo "[ID: {$row['ID']}] " . count($issues) . " problema(s)\n"; + echo "Título: " . substr($row['post_title'], 0, 60) . "\n"; + foreach (array_slice($issues, 0, 2) as $issue) { + echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n"; + } + echo "\n"; + } + } + } + $offset += $batch_size; + + if ($offset % 1000 == 0) { + echo "Procesados: $offset/$total...\n"; + } + } + + echo "─────────────────────────────────\n"; + echo "RESUMEN:\n"; + echo " Posts afectados: $affected\n"; + echo " Total incidencias: $total_issues\n"; + +} elseif ($mode === 'test') { + echo "MODO: PRUEBA (sin guardar)\n"; + echo "─────────────────────────────────\n\n"; + + $query = "SELECT ID, post_title, post_content FROM wp_posts + WHERE post_status = 'publish' + AND post_type IN ('post', 'page') + AND post_content IS NOT NULL + AND post_content != '' + ORDER BY ID LIMIT 200"; + $result = $conn->query($query); + + $tested = 0; + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['post_content']); + if (!empty($issues) && $tested < 5) { + $tested++; + echo "POST ID: {$row['ID']}\n"; + echo "Título: {$row['post_title']}\n"; + echo "Problemas detectados: " . count($issues) . "\n\n"; + + $fixResult = fixMalformedLists($row['post_content']); + $issuesAfter = detectIssues($fixResult['html']); + + echo "ANTES: " . count($issues) . " problemas\n"; + echo "DESPUÉS: " . count($issuesAfter) . " problemas\n"; + echo "Cambios: {$fixResult['changes']}\n"; + + // Verificar integridad + $before_ul = substr_count($row['post_content'], ': $before_ul → $after_ul " . ($before_ul === $after_ul ? "✓" : "⚠️") . "\n"; + echo "Tags
  • : $before_li → $after_li " . ($before_li === $after_li ? "✓" : "⚠️") . "\n"; + + if (count($issuesAfter) === 0) { + echo "✅ CORRECCIÓN EXITOSA\n"; + } else { + echo "⚠️ REQUIERE REVISIÓN\n"; + } + echo "─────────────────────────────────\n\n"; + } + } + +} elseif ($mode === 'fix') { + echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n"; + echo "─────────────────────────────────\n\n"; + + $batch_size = 50; + $offset = 0; + $fixed_count = 0; + $error_count = 0; + + while ($offset < $total) { + $query = "SELECT ID, post_content FROM wp_posts + WHERE post_status = 'publish' + AND post_type IN ('post', 'page') + AND post_content IS NOT NULL + AND post_content != '' + ORDER BY ID LIMIT $batch_size OFFSET $offset"; + $result = $conn->query($query); + + while ($row = $result->fetch_assoc()) { + $issues = detectIssues($row['post_content']); + + if (!empty($issues)) { + $fixResult = fixMalformedLists($row['post_content']); + + if ($fixResult['fixed']) { + $stmt = $conn->prepare("UPDATE wp_posts SET post_content = ? WHERE ID = ?"); + $stmt->bind_param("si", $fixResult['html'], $row['ID']); + + if ($stmt->execute()) { + $fixed_count++; + echo "[ID: {$row['ID']}] ✓ Corregido ({$fixResult['changes']} cambios)\n"; + } else { + $error_count++; + echo "[ID: {$row['ID']}] ✗ Error al guardar\n"; + } + $stmt->close(); + } + } + } + + $offset += $batch_size; + + if ($offset % 500 == 0) { + echo "Procesados: $offset/$total (corregidos: $fixed_count)\n"; + } + } + + echo "\n─────────────────────────────────\n"; + echo "RESUMEN:\n"; + echo " Posts corregidos: $fixed_count\n"; + echo " Errores: $error_count\n"; +} + +$conn->close(); +echo "\n✓ Proceso completado.\n";