Files
roi-theme/Shared/Infrastructure/Scripts/find-varied-cases.php
2025-11-27 17:45:55 -06:00

95 lines
3.0 KiB
PHP

<?php
/**
* Busca casos variados de problemas de listas para validación exhaustiva
*/
$conn = new mysqli("localhost", "preciosunitarios_seo", "ACl%EEFd=V-Yvb??", "preciosunitarios_seo");
$conn->set_charset("utf8mb4");
function detectIssues($html) {
$issues = [];
libxml_use_internal_errors(true);
$doc = new DOMDocument("1.0", "UTF-8");
$wrapped = '<div id="wrapper">' . $html . '</div>';
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
$validChildren = ["li", "script", "template"];
foreach (["ul", "ol"] as $tag) {
foreach ($doc->getElementsByTagName($tag) as $list) {
foreach ($list->childNodes as $child) {
if ($child->nodeType === XML_ELEMENT_NODE) {
$childTag = strtolower($child->nodeName);
if (!in_array($childTag, $validChildren)) {
$issues[] = ["parent" => $tag, "child" => $childTag];
}
}
}
}
}
return $issues;
}
echo "BUSCANDO CASOS VARIADOS...\n\n";
$query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id";
$result = $conn->query($query);
if (!$result) {
die("Error en query: " . $conn->error);
}
$cases = [
"many_issues" => [],
"ol_issues" => [],
"mixed_issues" => [],
"few_issues" => []
];
while ($row = $result->fetch_assoc()) {
$issues = detectIssues($row["html"]);
if (empty($issues)) continue;
$count = count($issues);
$hasOl = false;
$hasUl = false;
foreach ($issues as $issue) {
if ($issue["parent"] === "ol") $hasOl = true;
if ($issue["parent"] === "ul") $hasUl = true;
}
if ($count > 10 && count($cases["many_issues"]) < 3) {
$cases["many_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
}
if ($hasOl && !$hasUl && count($cases["ol_issues"]) < 3) {
$cases["ol_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
}
if ($hasOl && $hasUl && count($cases["mixed_issues"]) < 3) {
$cases["mixed_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
}
if ($count <= 2 && count($cases["few_issues"]) < 3) {
$cases["few_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
}
}
foreach ($cases as $type => $posts) {
echo "=== " . strtoupper($type) . " ===\n";
if (empty($posts)) {
echo " (ninguno encontrado)\n\n";
continue;
}
foreach ($posts as $post) {
echo "ID: {$post["id"]} - {$post["count"]} problemas\n";
echo "URL: {$post["url"]}\n";
echo "Tipos: ";
$types = [];
foreach ($post["issues"] as $i) {
$types[] = "<{$i["parent"]}> contiene <{$i["child"]}>";
}
echo implode(", ", array_unique($types)) . "\n\n";
}
}
$conn->close();