Add WordPress posts malformed lists fixer for post_content field

This commit is contained in:
FrankZamora
2025-11-27 18:10:37 -06:00
parent a3fa5fe22e
commit 3279b7df2b

View File

@@ -0,0 +1,322 @@
<?php
/**
* Corrector de Listas HTML Mal Formadas - WordPress Posts
*
* BASE DE DATOS: preciosunitarios_wp
* TABLA: wp_posts
* CAMPO: post_content
*
* USO:
* php fix-malformed-lists-wp-posts.php --mode=scan
* php fix-malformed-lists-wp-posts.php --mode=test
* php fix-malformed-lists-wp-posts.php --mode=fix
*
* @package ROI_Theme
*/
error_reporting(E_ALL);
ini_set('display_errors', 1);
ini_set('memory_limit', '512M');
set_time_limit(600);
$db_config = [
'host' => 'localhost',
'database' => 'preciosunitarios_wp',
'username' => 'preciosunitarios_wp',
'password' => 'Kq#Gk%yEt+PWpVe&HZ',
'charset' => 'utf8mb4'
];
$mode = 'scan';
foreach ($argv as $arg) {
if (strpos($arg, '--mode=') === 0) {
$mode = substr($arg, 7);
}
}
echo "==============================================\n";
echo " CORRECTOR DE LISTAS - WordPress Posts\n";
echo " Base de datos: {$db_config['database']}\n";
echo " Tabla: wp_posts (post_content)\n";
echo " Modo: $mode\n";
echo " Fecha: " . date('Y-m-d H:i:s') . "\n";
echo "==============================================\n\n";
function connectDatabase(array $config): ?mysqli {
$conn = new mysqli($config['host'], $config['username'], $config['password'], $config['database']);
if ($conn->connect_error) {
echo "Error de conexión: " . $conn->connect_error . "\n";
return null;
}
$conn->set_charset($config['charset']);
return $conn;
}
function detectIssues(string $html): array {
$issues = [];
if (empty(trim($html))) return $issues;
libxml_use_internal_errors(true);
$doc = new DOMDocument('1.0', 'UTF-8');
$wrapped = '<div id="temp-wrapper">' . $html . '</div>';
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$validChildren = ['li', 'script', 'template'];
foreach (['ul', 'ol'] as $listTag) {
foreach ($doc->getElementsByTagName($listTag) as $list) {
foreach ($list->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtolower($child->nodeName);
if (!in_array($tagName, $validChildren)) {
$issues[] = [
'list_type' => $listTag,
'invalid_child' => $tagName
];
}
}
}
}
}
return $issues;
}
function fixMalformedLists(string $html): array {
$result = ['fixed' => false, 'html' => $html, 'changes' => 0, 'details' => []];
if (empty(trim($html))) return $result;
libxml_use_internal_errors(true);
$doc = new DOMDocument('1.0', 'UTF-8');
$wrapped = '<div id="temp-wrapper">' . $html . '</div>';
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$lists = [];
foreach ($doc->getElementsByTagName('ul') as $ul) { $lists[] = $ul; }
foreach ($doc->getElementsByTagName('ol') as $ol) { $lists[] = $ol; }
$changes = 0;
$validChildren = ['li', 'script', 'template'];
foreach ($lists as $list) {
$nodesToProcess = [];
foreach ($list->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$tagName = strtolower($child->nodeName);
if (!in_array($tagName, $validChildren)) {
$nodesToProcess[] = $child;
}
}
}
foreach ($nodesToProcess as $node) {
$tagName = strtolower($node->nodeName);
$prevLi = null;
$prev = $node->previousSibling;
while ($prev) {
if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') {
$prevLi = $prev;
break;
}
$prev = $prev->previousSibling;
}
if ($prevLi) {
$prevLi->appendChild($node);
$result['details'][] = "Movido <$tagName> dentro del <li> anterior";
$changes++;
} else {
$newLi = $doc->createElement('li');
$list->insertBefore($newLi, $node);
$newLi->appendChild($node);
$result['details'][] = "Envuelto <$tagName> en nuevo <li>";
$changes++;
}
}
}
if ($changes > 0) {
$wrapper = $doc->getElementById('temp-wrapper');
if ($wrapper) {
$innerHTML = '';
foreach ($wrapper->childNodes as $child) {
$innerHTML .= $doc->saveHTML($child);
}
$result['html'] = $innerHTML;
$result['fixed'] = true;
$result['changes'] = $changes;
}
}
return $result;
}
// EJECUCIÓN PRINCIPAL
$conn = connectDatabase($db_config);
if (!$conn) {
exit(1);
}
echo "✓ Conexión establecida\n\n";
// Solo posts publicados con contenido
$countQuery = "SELECT COUNT(*) as total FROM wp_posts
WHERE post_status = 'publish'
AND post_type IN ('post', 'page')
AND post_content IS NOT NULL
AND post_content != ''";
$result = $conn->query($countQuery);
$total = $result->fetch_assoc()['total'];
echo "Total de posts/páginas publicados: $total\n\n";
if ($mode === 'scan') {
echo "MODO: ESCANEO (solo detección)\n";
echo "─────────────────────────────────\n\n";
$batch_size = 100;
$offset = 0;
$affected = 0;
$total_issues = 0;
while ($offset < $total) {
$query = "SELECT ID, post_title, post_content, guid FROM wp_posts
WHERE post_status = 'publish'
AND post_type IN ('post', 'page')
AND post_content IS NOT NULL
AND post_content != ''
ORDER BY ID LIMIT $batch_size OFFSET $offset";
$result = $conn->query($query);
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['post_content']);
if (!empty($issues)) {
$affected++;
$total_issues += count($issues);
if ($affected <= 20) {
echo "[ID: {$row['ID']}] " . count($issues) . " problema(s)\n";
echo "Título: " . substr($row['post_title'], 0, 60) . "\n";
foreach (array_slice($issues, 0, 2) as $issue) {
echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n";
}
echo "\n";
}
}
}
$offset += $batch_size;
if ($offset % 1000 == 0) {
echo "Procesados: $offset/$total...\n";
}
}
echo "─────────────────────────────────\n";
echo "RESUMEN:\n";
echo " Posts afectados: $affected\n";
echo " Total incidencias: $total_issues\n";
} elseif ($mode === 'test') {
echo "MODO: PRUEBA (sin guardar)\n";
echo "─────────────────────────────────\n\n";
$query = "SELECT ID, post_title, post_content FROM wp_posts
WHERE post_status = 'publish'
AND post_type IN ('post', 'page')
AND post_content IS NOT NULL
AND post_content != ''
ORDER BY ID LIMIT 200";
$result = $conn->query($query);
$tested = 0;
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['post_content']);
if (!empty($issues) && $tested < 5) {
$tested++;
echo "POST ID: {$row['ID']}\n";
echo "Título: {$row['post_title']}\n";
echo "Problemas detectados: " . count($issues) . "\n\n";
$fixResult = fixMalformedLists($row['post_content']);
$issuesAfter = detectIssues($fixResult['html']);
echo "ANTES: " . count($issues) . " problemas\n";
echo "DESPUÉS: " . count($issuesAfter) . " problemas\n";
echo "Cambios: {$fixResult['changes']}\n";
// Verificar integridad
$before_ul = substr_count($row['post_content'], '<ul');
$after_ul = substr_count($fixResult['html'], '<ul');
$before_li = substr_count($row['post_content'], '<li');
$after_li = substr_count($fixResult['html'], '<li');
echo "Tags <ul>: $before_ul$after_ul " . ($before_ul === $after_ul ? "" : "⚠️") . "\n";
echo "Tags <li>: $before_li$after_li " . ($before_li === $after_li ? "" : "⚠️") . "\n";
if (count($issuesAfter) === 0) {
echo "✅ CORRECCIÓN EXITOSA\n";
} else {
echo "⚠️ REQUIERE REVISIÓN\n";
}
echo "─────────────────────────────────\n\n";
}
}
} elseif ($mode === 'fix') {
echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n";
echo "─────────────────────────────────\n\n";
$batch_size = 50;
$offset = 0;
$fixed_count = 0;
$error_count = 0;
while ($offset < $total) {
$query = "SELECT ID, post_content FROM wp_posts
WHERE post_status = 'publish'
AND post_type IN ('post', 'page')
AND post_content IS NOT NULL
AND post_content != ''
ORDER BY ID LIMIT $batch_size OFFSET $offset";
$result = $conn->query($query);
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row['post_content']);
if (!empty($issues)) {
$fixResult = fixMalformedLists($row['post_content']);
if ($fixResult['fixed']) {
$stmt = $conn->prepare("UPDATE wp_posts SET post_content = ? WHERE ID = ?");
$stmt->bind_param("si", $fixResult['html'], $row['ID']);
if ($stmt->execute()) {
$fixed_count++;
echo "[ID: {$row['ID']}] ✓ Corregido ({$fixResult['changes']} cambios)\n";
} else {
$error_count++;
echo "[ID: {$row['ID']}] ✗ Error al guardar\n";
}
$stmt->close();
}
}
}
$offset += $batch_size;
if ($offset % 500 == 0) {
echo "Procesados: $offset/$total (corregidos: $fixed_count)\n";
}
}
echo "\n─────────────────────────────────\n";
echo "RESUMEN:\n";
echo " Posts corregidos: $fixed_count\n";
echo " Errores: $error_count\n";
}
$conn->close();
echo "\n✓ Proceso completado.\n";