95 lines
3.0 KiB
PHP
95 lines
3.0 KiB
PHP
<?php
|
|
/**
|
|
* Busca casos variados de problemas de listas para validación exhaustiva
|
|
*/
|
|
|
|
$conn = new mysqli("localhost", "preciosunitarios_seo", "ACl%EEFd=V-Yvb??", "preciosunitarios_seo");
|
|
$conn->set_charset("utf8mb4");
|
|
|
|
function detectIssues($html) {
|
|
$issues = [];
|
|
libxml_use_internal_errors(true);
|
|
$doc = new DOMDocument("1.0", "UTF-8");
|
|
$wrapped = '<div id="wrapper">' . $html . '</div>';
|
|
$doc->loadHTML('<?xml encoding="UTF-8">' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
|
|
libxml_clear_errors();
|
|
|
|
$validChildren = ["li", "script", "template"];
|
|
foreach (["ul", "ol"] as $tag) {
|
|
foreach ($doc->getElementsByTagName($tag) as $list) {
|
|
foreach ($list->childNodes as $child) {
|
|
if ($child->nodeType === XML_ELEMENT_NODE) {
|
|
$childTag = strtolower($child->nodeName);
|
|
if (!in_array($childTag, $validChildren)) {
|
|
$issues[] = ["parent" => $tag, "child" => $childTag];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return $issues;
|
|
}
|
|
|
|
echo "BUSCANDO CASOS VARIADOS...\n\n";
|
|
|
|
$query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id";
|
|
$result = $conn->query($query);
|
|
|
|
if (!$result) {
|
|
die("Error en query: " . $conn->error);
|
|
}
|
|
|
|
$cases = [
|
|
"many_issues" => [],
|
|
"ol_issues" => [],
|
|
"mixed_issues" => [],
|
|
"few_issues" => []
|
|
];
|
|
|
|
while ($row = $result->fetch_assoc()) {
|
|
$issues = detectIssues($row["html"]);
|
|
if (empty($issues)) continue;
|
|
|
|
$count = count($issues);
|
|
$hasOl = false;
|
|
$hasUl = false;
|
|
|
|
foreach ($issues as $issue) {
|
|
if ($issue["parent"] === "ol") $hasOl = true;
|
|
if ($issue["parent"] === "ul") $hasUl = true;
|
|
}
|
|
|
|
if ($count > 10 && count($cases["many_issues"]) < 3) {
|
|
$cases["many_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
|
|
}
|
|
if ($hasOl && !$hasUl && count($cases["ol_issues"]) < 3) {
|
|
$cases["ol_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
|
|
}
|
|
if ($hasOl && $hasUl && count($cases["mixed_issues"]) < 3) {
|
|
$cases["mixed_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
|
|
}
|
|
if ($count <= 2 && count($cases["few_issues"]) < 3) {
|
|
$cases["few_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues];
|
|
}
|
|
}
|
|
|
|
foreach ($cases as $type => $posts) {
|
|
echo "=== " . strtoupper($type) . " ===\n";
|
|
if (empty($posts)) {
|
|
echo " (ninguno encontrado)\n\n";
|
|
continue;
|
|
}
|
|
foreach ($posts as $post) {
|
|
echo "ID: {$post["id"]} - {$post["count"]} problemas\n";
|
|
echo "URL: {$post["url"]}\n";
|
|
echo "Tipos: ";
|
|
$types = [];
|
|
foreach ($post["issues"] as $i) {
|
|
$types[] = "<{$i["parent"]}> contiene <{$i["child"]}>";
|
|
}
|
|
echo implode(", ", array_unique($types)) . "\n\n";
|
|
}
|
|
}
|
|
|
|
$conn->close();
|