set_charset("utf8mb4"); function detectIssues($html) { $issues = []; libxml_use_internal_errors(true); $doc = new DOMDocument("1.0", "UTF-8"); $wrapped = '
' . $html . '
'; $doc->loadHTML('' . $wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); libxml_clear_errors(); $validChildren = ["li", "script", "template"]; foreach (["ul", "ol"] as $tag) { foreach ($doc->getElementsByTagName($tag) as $list) { foreach ($list->childNodes as $child) { if ($child->nodeType === XML_ELEMENT_NODE) { $childTag = strtolower($child->nodeName); if (!in_array($childTag, $validChildren)) { $issues[] = ["parent" => $tag, "child" => $childTag]; } } } } } return $issues; } echo "BUSCANDO CASOS VARIADOS...\n\n"; $query = "SELECT id, page, html FROM datos_seo_pagina WHERE html IS NOT NULL AND html != '' ORDER BY id"; $result = $conn->query($query); if (!$result) { die("Error en query: " . $conn->error); } $cases = [ "many_issues" => [], "ol_issues" => [], "mixed_issues" => [], "few_issues" => [] ]; while ($row = $result->fetch_assoc()) { $issues = detectIssues($row["html"]); if (empty($issues)) continue; $count = count($issues); $hasOl = false; $hasUl = false; foreach ($issues as $issue) { if ($issue["parent"] === "ol") $hasOl = true; if ($issue["parent"] === "ul") $hasUl = true; } if ($count > 10 && count($cases["many_issues"]) < 3) { $cases["many_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues]; } if ($hasOl && !$hasUl && count($cases["ol_issues"]) < 3) { $cases["ol_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues]; } if ($hasOl && $hasUl && count($cases["mixed_issues"]) < 3) { $cases["mixed_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues]; } if ($count <= 2 && count($cases["few_issues"]) < 3) { $cases["few_issues"][] = ["id" => $row["id"], "url" => $row["page"], "count" => $count, "issues" => $issues]; } } foreach ($cases as $type => $posts) { echo "=== " . strtoupper($type) . " ===\n"; if (empty($posts)) { echo " (ninguno encontrado)\n\n"; continue; } foreach ($posts as $post) { echo "ID: {$post["id"]} - {$post["count"]} problemas\n"; echo "URL: {$post["url"]}\n"; echo "Tipos: "; $types = []; foreach ($post["issues"] as $i) { $types[] = "<{$i["parent"]}> contiene <{$i["child"]}>"; } echo implode(", ", array_unique($types)) . "\n\n"; } } $conn->close();