Add diagnostic script for malformed HTML lists

Phase 4.4 Accessibility: Script to scan database for posts
with invalid list structures (<ul> containing non-<li> children).
Read-only analysis, no modifications.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
FrankZamora
2025-11-27 17:20:00 -06:00
parent 371995d151
commit f4bd013271

View File

@@ -0,0 +1,307 @@
<?php
/**
* Script de Diagnóstico: Listas HTML Mal Formadas
*
* PROPÓSITO: Identificar posts con estructura de listas inválida
* - <ul> conteniendo <ul> como hijo directo (en lugar de dentro de <li>)
* - <ol> conteniendo <ol> como hijo directo
*
* BASE DE DATOS: preciosunitarios_seo
* TABLA: datos_seo_pagina
* CAMPO: html
*
* IMPORTANTE: Este script SOLO LEE, no modifica ningún dato.
*
* @package ROI_Theme
* @since Phase 4.4 Accessibility
*/
// Configuración de errores para debugging
error_reporting(E_ALL);
ini_set('display_errors', 1);
ini_set('memory_limit', '512M');
set_time_limit(300); // 5 minutos máximo
// Credenciales de base de datos (ajustar según servidor)
$db_config = [
'host' => 'localhost',
'database' => 'preciosunitarios_seo',
'username' => 'root', // Cambiar en producción
'password' => '', // Cambiar en producción
'charset' => 'utf8mb4'
];
// Patrones regex para detectar listas mal formadas
$malformed_patterns = [
// <ul> seguido directamente de <ul> (sin estar dentro de <li>)
'ul_direct_ul' => '/<ul[^>]*>\s*(?:<li[^>]*>.*?<\/li>\s*)*<ul/is',
// Patrón más específico: </li> seguido de <ul> (hermanos en lugar de anidados)
'li_sibling_ul' => '/<\/li>\s*<ul[^>]*>/is',
// <ol> seguido directamente de <ol>
'ol_direct_ol' => '/<ol[^>]*>\s*(?:<li[^>]*>.*?<\/li>\s*)*<ol/is',
// </li> seguido de <ol> (hermanos)
'li_sibling_ol' => '/<\/li>\s*<ol[^>]*>/is',
];
/**
* Conectar a la base de datos
*/
function connectDatabase(array $config): ?mysqli {
$conn = new mysqli(
$config['host'],
$config['username'],
$config['password'],
$config['database']
);
if ($conn->connect_error) {
echo "Error de conexión: " . $conn->connect_error . "\n";
return null;
}
$conn->set_charset($config['charset']);
return $conn;
}
/**
* Analizar HTML en busca de listas mal formadas
*/
function analyzeMalformedLists(string $html, array $patterns): array {
$issues = [];
foreach ($patterns as $pattern_name => $pattern) {
if (preg_match_all($pattern, $html, $matches, PREG_OFFSET_CAPTURE)) {
foreach ($matches[0] as $match) {
$position = $match[1];
$context = getContextAroundPosition($html, $position, 100);
$issues[] = [
'type' => $pattern_name,
'position' => $position,
'context' => $context
];
}
}
}
return $issues;
}
/**
* Obtener contexto alrededor de una posición
*/
function getContextAroundPosition(string $html, int $position, int $length = 100): string {
$start = max(0, $position - $length);
$end = min(strlen($html), $position + $length);
$context = substr($html, $start, $end - $start);
// Limpiar para mostrar
$context = preg_replace('/\s+/', ' ', $context);
$context = htmlspecialchars($context);
if ($start > 0) {
$context = '...' . $context;
}
if ($end < strlen($html)) {
$context .= '...';
}
return $context;
}
/**
* Contar total de listas en el HTML
*/
function countListElements(string $html): array {
$ul_count = preg_match_all('/<ul[^>]*>/i', $html);
$ol_count = preg_match_all('/<ol[^>]*>/i', $html);
$li_count = preg_match_all('/<li[^>]*>/i', $html);
return [
'ul' => $ul_count,
'ol' => $ol_count,
'li' => $li_count
];
}
// ============================================
// EJECUCIÓN PRINCIPAL
// ============================================
echo "==============================================\n";
echo " DIAGNÓSTICO: Listas HTML Mal Formadas\n";
echo " Base de datos: {$db_config['database']}\n";
echo " Tabla: datos_seo_pagina\n";
echo " Fecha: " . date('Y-m-d H:i:s') . "\n";
echo "==============================================\n\n";
// Conectar
$conn = connectDatabase($db_config);
if (!$conn) {
exit(1);
}
echo "✓ Conexión establecida\n\n";
// Obtener estructura de la tabla
echo "Verificando estructura de tabla...\n";
$result = $conn->query("DESCRIBE datos_seo_pagina");
if ($result) {
echo "Columnas encontradas:\n";
while ($row = $result->fetch_assoc()) {
echo " - {$row['Field']} ({$row['Type']})\n";
}
echo "\n";
}
// Contar registros totales
$result = $conn->query("SELECT COUNT(*) as total FROM datos_seo_pagina WHERE html IS NOT NULL AND html != ''");
$total = $result->fetch_assoc()['total'];
echo "Total de registros con HTML: {$total}\n\n";
// Procesar en lotes
$batch_size = 100;
$offset = 0;
$affected_posts = [];
$total_issues = 0;
$processed = 0;
echo "Iniciando análisis...\n";
echo "─────────────────────────────────────────────\n";
while ($offset < $total) {
$query = "SELECT id, url, html FROM datos_seo_pagina
WHERE html IS NOT NULL AND html != ''
ORDER BY id
LIMIT {$batch_size} OFFSET {$offset}";
$result = $conn->query($query);
if (!$result) {
echo "Error en consulta: " . $conn->error . "\n";
break;
}
while ($row = $result->fetch_assoc()) {
$processed++;
$id = $row['id'];
$url = $row['url'] ?? 'N/A';
$html = $row['html'];
$issues = analyzeMalformedLists($html, $malformed_patterns);
if (!empty($issues)) {
$list_counts = countListElements($html);
$affected_posts[] = [
'id' => $id,
'url' => $url,
'issues' => $issues,
'list_counts' => $list_counts
];
$total_issues += count($issues);
// Mostrar progreso para posts afectados
echo "\n[ID: {$id}] " . count($issues) . " problema(s) encontrado(s)\n";
echo "URL: {$url}\n";
echo "Listas: UL={$list_counts['ul']}, OL={$list_counts['ol']}, LI={$list_counts['li']}\n";
foreach ($issues as $idx => $issue) {
echo " Problema " . ($idx + 1) . ": {$issue['type']} (pos: {$issue['position']})\n";
}
}
// Mostrar progreso cada 500 registros
if ($processed % 500 == 0) {
echo "\rProcesados: {$processed}/{$total}...";
}
}
$offset += $batch_size;
}
echo "\n\n";
echo "==============================================\n";
echo " RESUMEN DEL ANÁLISIS\n";
echo "==============================================\n\n";
echo "Registros analizados: {$processed}\n";
echo "Posts con problemas: " . count($affected_posts) . "\n";
echo "Total de incidencias: {$total_issues}\n\n";
if (count($affected_posts) > 0) {
echo "─────────────────────────────────────────────\n";
echo "DETALLE DE POSTS AFECTADOS\n";
echo "─────────────────────────────────────────────\n\n";
// Agrupar por tipo de problema
$by_type = [];
foreach ($affected_posts as $post) {
foreach ($post['issues'] as $issue) {
$type = $issue['type'];
if (!isset($by_type[$type])) {
$by_type[$type] = [];
}
$by_type[$type][] = $post['id'];
}
}
echo "Por tipo de problema:\n";
foreach ($by_type as $type => $ids) {
$unique_ids = array_unique($ids);
echo " - {$type}: " . count($unique_ids) . " posts\n";
}
echo "\n─────────────────────────────────────────────\n";
echo "LISTA DE IDs AFECTADOS (para revisión manual)\n";
echo "─────────────────────────────────────────────\n\n";
$ids_list = array_column($affected_posts, 'id');
echo "IDs: " . implode(', ', $ids_list) . "\n";
// Generar archivo de reporte
$report_file = __DIR__ . '/malformed-lists-report-' . date('Ymd-His') . '.json';
$report_data = [
'generated_at' => date('Y-m-d H:i:s'),
'database' => $db_config['database'],
'table' => 'datos_seo_pagina',
'total_analyzed' => $processed,
'total_affected' => count($affected_posts),
'total_issues' => $total_issues,
'by_type' => array_map(function($ids) {
return array_values(array_unique($ids));
}, $by_type),
'affected_posts' => $affected_posts
];
if (file_put_contents($report_file, json_encode($report_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE))) {
echo "\n✓ Reporte JSON guardado en:\n {$report_file}\n";
}
// Muestra de contexto para análisis
echo "\n─────────────────────────────────────────────\n";
echo "MUESTRA DE CONTEXTO (primeros 3 posts)\n";
echo "─────────────────────────────────────────────\n\n";
$sample = array_slice($affected_posts, 0, 3);
foreach ($sample as $post) {
echo "POST ID: {$post['id']}\n";
echo "URL: {$post['url']}\n";
foreach ($post['issues'] as $idx => $issue) {
echo " [{$issue['type']}]\n";
echo " Contexto: {$issue['context']}\n\n";
}
echo "───────────────────────\n";
}
} else {
echo "✓ No se encontraron listas mal formadas.\n";
}
$conn->close();
echo "\n✓ Análisis completado.\n";