323 lines
11 KiB
PHP
323 lines
11 KiB
PHP
<?php
|
|
/**
|
|
* Corrector de Listas HTML Mal Formadas - WordPress Posts
|
|
*
|
|
* BASE DE DATOS: preciosunitarios_wp
|
|
* TABLA: wp_posts
|
|
* CAMPO: post_content
|
|
*
|
|
* USO:
|
|
* php fix-malformed-lists-wp-posts.php --mode=scan
|
|
* php fix-malformed-lists-wp-posts.php --mode=test
|
|
* php fix-malformed-lists-wp-posts.php --mode=fix
|
|
*
|
|
* @package ROI_Theme
|
|
*/
|
|
|
|
error_reporting(E_ALL);
|
|
ini_set('display_errors', 1);
|
|
ini_set('memory_limit', '512M');
|
|
set_time_limit(600);
|
|
|
|
$db_config = [
|
|
'host' => 'localhost',
|
|
'database' => 'preciosunitarios_wp',
|
|
'username' => 'preciosunitarios_wp',
|
|
'password' => 'Kq#Gk%yEt+PWpVe&HZ',
|
|
'charset' => 'utf8mb4'
|
|
];
|
|
|
|
$mode = 'scan';
|
|
foreach ($argv as $arg) {
|
|
if (strpos($arg, '--mode=') === 0) {
|
|
$mode = substr($arg, 7);
|
|
}
|
|
}
|
|
|
|
echo "==============================================\n";
|
|
echo " CORRECTOR DE LISTAS - WordPress Posts\n";
|
|
echo " Base de datos: {$db_config['database']}\n";
|
|
echo " Tabla: wp_posts (post_content)\n";
|
|
echo " Modo: $mode\n";
|
|
echo " Fecha: " . date('Y-m-d H:i:s') . "\n";
|
|
echo "==============================================\n\n";
|
|
|
|
function connectDatabase(array $config): ?mysqli {
|
|
$conn = new mysqli($config['host'], $config['username'], $config['password'], $config['database']);
|
|
if ($conn->connect_error) {
|
|
echo "Error de conexión: " . $conn->connect_error . "\n";
|
|
return null;
|
|
}
|
|
$conn->set_charset($config['charset']);
|
|
return $conn;
|
|
}
|
|
|
|
function detectIssues(string $html): array {
|
|
$issues = [];
|
|
if (empty(trim($html))) return $issues;
|
|
|
|
libxml_use_internal_errors(true);
|
|
$doc = new DOMDocument('1.0', 'UTF-8');
|
|
$wrapped = '<div id="temp-wrapper">' . $html . '</div>';
|
|
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
libxml_clear_errors();
|
|
|
|
$validChildren = ['li', 'script', 'template'];
|
|
|
|
foreach (['ul', 'ol'] as $listTag) {
|
|
foreach ($doc->getElementsByTagName($listTag) as $list) {
|
|
foreach ($list->childNodes as $child) {
|
|
if ($child->nodeType === XML_ELEMENT_NODE) {
|
|
$tagName = strtolower($child->nodeName);
|
|
if (!in_array($tagName, $validChildren)) {
|
|
$issues[] = [
|
|
'list_type' => $listTag,
|
|
'invalid_child' => $tagName
|
|
];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return $issues;
|
|
}
|
|
|
|
function fixMalformedLists(string $html): array {
|
|
$result = ['fixed' => false, 'html' => $html, 'changes' => 0, 'details' => []];
|
|
|
|
if (empty(trim($html))) return $result;
|
|
|
|
libxml_use_internal_errors(true);
|
|
$doc = new DOMDocument('1.0', 'UTF-8');
|
|
$wrapped = '<div id="temp-wrapper">' . $html . '</div>';
|
|
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
libxml_clear_errors();
|
|
|
|
$lists = [];
|
|
foreach ($doc->getElementsByTagName('ul') as $ul) { $lists[] = $ul; }
|
|
foreach ($doc->getElementsByTagName('ol') as $ol) { $lists[] = $ol; }
|
|
|
|
$changes = 0;
|
|
$validChildren = ['li', 'script', 'template'];
|
|
|
|
foreach ($lists as $list) {
|
|
$nodesToProcess = [];
|
|
foreach ($list->childNodes as $child) {
|
|
if ($child->nodeType === XML_ELEMENT_NODE) {
|
|
$tagName = strtolower($child->nodeName);
|
|
if (!in_array($tagName, $validChildren)) {
|
|
$nodesToProcess[] = $child;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($nodesToProcess as $node) {
|
|
$tagName = strtolower($node->nodeName);
|
|
$prevLi = null;
|
|
$prev = $node->previousSibling;
|
|
|
|
while ($prev) {
|
|
if ($prev->nodeType === XML_ELEMENT_NODE && strtolower($prev->nodeName) === 'li') {
|
|
$prevLi = $prev;
|
|
break;
|
|
}
|
|
$prev = $prev->previousSibling;
|
|
}
|
|
|
|
if ($prevLi) {
|
|
$prevLi->appendChild($node);
|
|
$result['details'][] = "Movido <$tagName> dentro del <li> anterior";
|
|
$changes++;
|
|
} else {
|
|
$newLi = $doc->createElement('li');
|
|
$list->insertBefore($newLi, $node);
|
|
$newLi->appendChild($node);
|
|
$result['details'][] = "Envuelto <$tagName> en nuevo <li>";
|
|
$changes++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($changes > 0) {
|
|
$wrapper = $doc->getElementById('temp-wrapper');
|
|
if ($wrapper) {
|
|
$innerHTML = '';
|
|
foreach ($wrapper->childNodes as $child) {
|
|
$innerHTML .= $doc->saveHTML($child);
|
|
}
|
|
$result['html'] = $innerHTML;
|
|
$result['fixed'] = true;
|
|
$result['changes'] = $changes;
|
|
}
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
// EJECUCIÓN PRINCIPAL
|
|
$conn = connectDatabase($db_config);
|
|
if (!$conn) {
|
|
exit(1);
|
|
}
|
|
|
|
echo "✓ Conexión establecida\n\n";
|
|
|
|
// Solo posts publicados con contenido
|
|
$countQuery = "SELECT COUNT(*) as total FROM wp_posts
|
|
WHERE post_status = 'publish'
|
|
AND post_type IN ('post', 'page')
|
|
AND post_content IS NOT NULL
|
|
AND post_content != ''";
|
|
$result = $conn->query($countQuery);
|
|
$total = $result->fetch_assoc()['total'];
|
|
echo "Total de posts/páginas publicados: $total\n\n";
|
|
|
|
if ($mode === 'scan') {
|
|
echo "MODO: ESCANEO (solo detección)\n";
|
|
echo "─────────────────────────────────\n\n";
|
|
|
|
$batch_size = 100;
|
|
$offset = 0;
|
|
$affected = 0;
|
|
$total_issues = 0;
|
|
|
|
while ($offset < $total) {
|
|
$query = "SELECT ID, post_title, post_content, guid FROM wp_posts
|
|
WHERE post_status = 'publish'
|
|
AND post_type IN ('post', 'page')
|
|
AND post_content IS NOT NULL
|
|
AND post_content != ''
|
|
ORDER BY ID LIMIT $batch_size OFFSET $offset";
|
|
$result = $conn->query($query);
|
|
|
|
while ($row = $result->fetch_assoc()) {
|
|
$issues = detectIssues($row['post_content']);
|
|
if (!empty($issues)) {
|
|
$affected++;
|
|
$total_issues += count($issues);
|
|
|
|
if ($affected <= 20) {
|
|
echo "[ID: {$row['ID']}] " . count($issues) . " problema(s)\n";
|
|
echo "Título: " . substr($row['post_title'], 0, 60) . "\n";
|
|
foreach (array_slice($issues, 0, 2) as $issue) {
|
|
echo " - <{$issue['list_type']}> contiene <{$issue['invalid_child']}>\n";
|
|
}
|
|
echo "\n";
|
|
}
|
|
}
|
|
}
|
|
$offset += $batch_size;
|
|
|
|
if ($offset % 1000 == 0) {
|
|
echo "Procesados: $offset/$total...\n";
|
|
}
|
|
}
|
|
|
|
echo "─────────────────────────────────\n";
|
|
echo "RESUMEN:\n";
|
|
echo " Posts afectados: $affected\n";
|
|
echo " Total incidencias: $total_issues\n";
|
|
|
|
} elseif ($mode === 'test') {
|
|
echo "MODO: PRUEBA (sin guardar)\n";
|
|
echo "─────────────────────────────────\n\n";
|
|
|
|
$query = "SELECT ID, post_title, post_content FROM wp_posts
|
|
WHERE post_status = 'publish'
|
|
AND post_type IN ('post', 'page')
|
|
AND post_content IS NOT NULL
|
|
AND post_content != ''
|
|
ORDER BY ID LIMIT 200";
|
|
$result = $conn->query($query);
|
|
|
|
$tested = 0;
|
|
while ($row = $result->fetch_assoc()) {
|
|
$issues = detectIssues($row['post_content']);
|
|
if (!empty($issues) && $tested < 5) {
|
|
$tested++;
|
|
echo "POST ID: {$row['ID']}\n";
|
|
echo "Título: {$row['post_title']}\n";
|
|
echo "Problemas detectados: " . count($issues) . "\n\n";
|
|
|
|
$fixResult = fixMalformedLists($row['post_content']);
|
|
$issuesAfter = detectIssues($fixResult['html']);
|
|
|
|
echo "ANTES: " . count($issues) . " problemas\n";
|
|
echo "DESPUÉS: " . count($issuesAfter) . " problemas\n";
|
|
echo "Cambios: {$fixResult['changes']}\n";
|
|
|
|
// Verificar integridad
|
|
$before_ul = substr_count($row['post_content'], '<ul');
|
|
$after_ul = substr_count($fixResult['html'], '<ul');
|
|
$before_li = substr_count($row['post_content'], '<li');
|
|
$after_li = substr_count($fixResult['html'], '<li');
|
|
|
|
echo "Tags <ul>: $before_ul → $after_ul " . ($before_ul === $after_ul ? "✓" : "⚠️") . "\n";
|
|
echo "Tags <li>: $before_li → $after_li " . ($before_li === $after_li ? "✓" : "⚠️") . "\n";
|
|
|
|
if (count($issuesAfter) === 0) {
|
|
echo "✅ CORRECCIÓN EXITOSA\n";
|
|
} else {
|
|
echo "⚠️ REQUIERE REVISIÓN\n";
|
|
}
|
|
echo "─────────────────────────────────\n\n";
|
|
}
|
|
}
|
|
|
|
} elseif ($mode === 'fix') {
|
|
echo "MODO: CORRECCIÓN (GUARDANDO CAMBIOS)\n";
|
|
echo "─────────────────────────────────\n\n";
|
|
|
|
$batch_size = 50;
|
|
$offset = 0;
|
|
$fixed_count = 0;
|
|
$error_count = 0;
|
|
|
|
while ($offset < $total) {
|
|
$query = "SELECT ID, post_content FROM wp_posts
|
|
WHERE post_status = 'publish'
|
|
AND post_type IN ('post', 'page')
|
|
AND post_content IS NOT NULL
|
|
AND post_content != ''
|
|
ORDER BY ID LIMIT $batch_size OFFSET $offset";
|
|
$result = $conn->query($query);
|
|
|
|
while ($row = $result->fetch_assoc()) {
|
|
$issues = detectIssues($row['post_content']);
|
|
|
|
if (!empty($issues)) {
|
|
$fixResult = fixMalformedLists($row['post_content']);
|
|
|
|
if ($fixResult['fixed']) {
|
|
$stmt = $conn->prepare("UPDATE wp_posts SET post_content = ? WHERE ID = ?");
|
|
$stmt->bind_param("si", $fixResult['html'], $row['ID']);
|
|
|
|
if ($stmt->execute()) {
|
|
$fixed_count++;
|
|
echo "[ID: {$row['ID']}] ✓ Corregido ({$fixResult['changes']} cambios)\n";
|
|
} else {
|
|
$error_count++;
|
|
echo "[ID: {$row['ID']}] ✗ Error al guardar\n";
|
|
}
|
|
$stmt->close();
|
|
}
|
|
}
|
|
}
|
|
|
|
$offset += $batch_size;
|
|
|
|
if ($offset % 500 == 0) {
|
|
echo "Procesados: $offset/$total (corregidos: $fixed_count)\n";
|
|
}
|
|
}
|
|
|
|
echo "\n─────────────────────────────────\n";
|
|
echo "RESUMEN:\n";
|
|
echo " Posts corregidos: $fixed_count\n";
|
|
echo " Errores: $error_count\n";
|
|
}
|
|
|
|
$conn->close();
|
|
echo "\n✓ Proceso completado.\n";
|