- Add SpamDetectionService to detect gibberish/random text - Detect excessive consonants, low vowel ratio, mixed case patterns - Detect repeated characters and extremely long words - Validate names look realistic (start with letter, have vowels) - Cross-validate multiple suspicious fields - Integrate with ContactFormAjaxHandler and NewsletterAjaxHandler - Log blocked attempts to debug.log Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
404 lines
12 KiB
PHP
404 lines
12 KiB
PHP
<?php
|
|
declare(strict_types=1);
|
|
|
|
namespace ROITheme\Shared\Application\Services;
|
|
|
|
/**
|
|
* SpamDetectionService - Detecta texto spam/gibberish en formularios
|
|
*
|
|
* RESPONSABILIDAD: Analizar contenido de formularios para detectar:
|
|
* - Texto aleatorio sin sentido (gibberish)
|
|
* - Patrones de spam comunes
|
|
* - Caracteres sospechosos
|
|
*
|
|
* @package ROITheme\Shared\Application\Services
|
|
*/
|
|
final class SpamDetectionService
|
|
{
|
|
/**
|
|
* Configuracion por defecto
|
|
*/
|
|
private const DEFAULT_CONFIG = [
|
|
'min_vowel_ratio' => 0.15, // Minimo 15% vocales en texto
|
|
'max_consonant_sequence' => 6, // Maximo 6 consonantes seguidas
|
|
'max_uppercase_ratio' => 0.5, // Maximo 50% mayusculas
|
|
'min_word_length_avg' => 2, // Promedio minimo de longitud de palabra
|
|
'max_word_length' => 25, // Longitud maxima de palabra
|
|
'max_repeated_chars' => 4, // Maximo 4 caracteres repetidos seguidos
|
|
'log_blocked' => true, // Loguear intentos bloqueados
|
|
];
|
|
|
|
private array $config;
|
|
|
|
public function __construct(array $config = [])
|
|
{
|
|
$this->config = array_merge(self::DEFAULT_CONFIG, $config);
|
|
}
|
|
|
|
/**
|
|
* Validar formulario de contacto
|
|
*
|
|
* @param array $data Datos del formulario [fullName, company, whatsapp, email, message]
|
|
* @return array ['valid' => bool, 'reason' => string]
|
|
*/
|
|
public function validateContactForm(array $data): array
|
|
{
|
|
$fieldsToCheck = [
|
|
'fullName' => $data['fullName'] ?? '',
|
|
'company' => $data['company'] ?? '',
|
|
'whatsapp' => $data['whatsapp'] ?? '',
|
|
'message' => $data['message'] ?? '',
|
|
];
|
|
|
|
foreach ($fieldsToCheck as $fieldName => $value) {
|
|
if (empty($value)) {
|
|
continue;
|
|
}
|
|
|
|
$result = $this->analyzeText($value, $fieldName);
|
|
if (!$result['valid']) {
|
|
$this->logBlocked('contact-form', $fieldName, $value, $result['reason']);
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
// Validacion cruzada: si multiples campos parecen aleatorios
|
|
$suspiciousCount = 0;
|
|
foreach ($fieldsToCheck as $fieldName => $value) {
|
|
if (!empty($value) && $this->looksRandom($value)) {
|
|
$suspiciousCount++;
|
|
}
|
|
}
|
|
|
|
if ($suspiciousCount >= 2) {
|
|
$reason = 'Multiples campos con contenido sospechoso';
|
|
$this->logBlocked('contact-form', 'multiple', implode(' | ', $fieldsToCheck), $reason);
|
|
return ['valid' => false, 'reason' => $reason];
|
|
}
|
|
|
|
return ['valid' => true, 'reason' => ''];
|
|
}
|
|
|
|
/**
|
|
* Validar formulario de newsletter
|
|
*
|
|
* @param array $data Datos del formulario [name, whatsapp]
|
|
* @return array ['valid' => bool, 'reason' => string]
|
|
*/
|
|
public function validateNewsletterForm(array $data): array
|
|
{
|
|
$fieldsToCheck = [
|
|
'name' => $data['name'] ?? '',
|
|
'whatsapp' => $data['whatsapp'] ?? '',
|
|
];
|
|
|
|
foreach ($fieldsToCheck as $fieldName => $value) {
|
|
if (empty($value)) {
|
|
continue;
|
|
}
|
|
|
|
$result = $this->analyzeText($value, $fieldName);
|
|
if (!$result['valid']) {
|
|
$this->logBlocked('newsletter', $fieldName, $value, $result['reason']);
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
return ['valid' => true, 'reason' => ''];
|
|
}
|
|
|
|
/**
|
|
* Analizar texto individual
|
|
*/
|
|
private function analyzeText(string $text, string $fieldName = ''): array
|
|
{
|
|
$text = trim($text);
|
|
|
|
if (empty($text)) {
|
|
return ['valid' => true, 'reason' => ''];
|
|
}
|
|
|
|
// 1. Detectar exceso de consonantes seguidas (gibberish)
|
|
if ($this->hasExcessiveConsonants($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'Texto con patron de caracteres invalido'
|
|
];
|
|
}
|
|
|
|
// 2. Detectar ratio de vocales muy bajo (para textos latinos)
|
|
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'Texto no parece ser legible'
|
|
];
|
|
}
|
|
|
|
// 3. Detectar exceso de mayusculas mezcladas
|
|
if ($this->hasExcessiveMixedCase($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'Formato de texto invalido'
|
|
];
|
|
}
|
|
|
|
// 4. Detectar caracteres repetidos
|
|
if ($this->hasRepeatedChars($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'Texto con caracteres repetidos invalidos'
|
|
];
|
|
}
|
|
|
|
// 5. Detectar palabras extremadamente largas sin espacios
|
|
if ($this->hasExtremelyLongWords($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'Texto con formato invalido'
|
|
];
|
|
}
|
|
|
|
// 6. Para campos de nombre, validar que parezca un nombre real
|
|
if (in_array($fieldName, ['fullName', 'name']) && !$this->looksLikeName($text)) {
|
|
return [
|
|
'valid' => false,
|
|
'reason' => 'El nombre no tiene un formato valido'
|
|
];
|
|
}
|
|
|
|
return ['valid' => true, 'reason' => ''];
|
|
}
|
|
|
|
/**
|
|
* Detectar exceso de consonantes seguidas
|
|
*/
|
|
private function hasExcessiveConsonants(string $text): bool
|
|
{
|
|
$consonants = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ';
|
|
$maxSequence = $this->config['max_consonant_sequence'];
|
|
|
|
$count = 0;
|
|
for ($i = 0; $i < strlen($text); $i++) {
|
|
if (strpos($consonants, $text[$i]) !== false) {
|
|
$count++;
|
|
if ($count > $maxSequence) {
|
|
return true;
|
|
}
|
|
} else {
|
|
$count = 0;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Detectar ratio de vocales muy bajo
|
|
*/
|
|
private function hasLowVowelRatio(string $text): bool
|
|
{
|
|
$vowels = 'aeiouAEIOUáéíóúÁÉÍÓÚ';
|
|
$letters = preg_replace('/[^a-zA-ZáéíóúÁÉÍÓÚ]/', '', $text);
|
|
|
|
if (strlen($letters) < 4) {
|
|
return false; // Texto muy corto, no analizar
|
|
}
|
|
|
|
$vowelCount = 0;
|
|
for ($i = 0; $i < strlen($letters); $i++) {
|
|
if (strpos($vowels, $letters[$i]) !== false) {
|
|
$vowelCount++;
|
|
}
|
|
}
|
|
|
|
$ratio = $vowelCount / strlen($letters);
|
|
return $ratio < $this->config['min_vowel_ratio'];
|
|
}
|
|
|
|
/**
|
|
* Verificar si es texto latino (español/ingles)
|
|
*/
|
|
private function isLatinText(string $text): bool
|
|
{
|
|
// Si mas del 80% son caracteres latinos, considerarlo latino
|
|
$latinChars = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
|
|
$totalChars = preg_match_all('/\S/', $text);
|
|
|
|
if ($totalChars === 0) {
|
|
return false;
|
|
}
|
|
|
|
return ($latinChars / $totalChars) > 0.8;
|
|
}
|
|
|
|
/**
|
|
* Detectar exceso de mayusculas mezcladas (ej: "MDhFfVCCKZYU")
|
|
*/
|
|
private function hasExcessiveMixedCase(string $text): bool
|
|
{
|
|
$letters = preg_replace('/[^a-zA-Z]/', '', $text);
|
|
|
|
if (strlen($letters) < 5) {
|
|
return false;
|
|
}
|
|
|
|
$uppercase = preg_match_all('/[A-Z]/', $letters);
|
|
$lowercase = preg_match_all('/[a-z]/', $letters);
|
|
|
|
// Si hay mas de 50% mayusculas Y hay alternancia frecuente
|
|
if ($uppercase > 0 && $lowercase > 0) {
|
|
$ratio = $uppercase / strlen($letters);
|
|
if ($ratio > $this->config['max_uppercase_ratio']) {
|
|
// Verificar alternancia (MdHfF tipo patron)
|
|
$switches = 0;
|
|
for ($i = 1; $i < strlen($letters); $i++) {
|
|
$prevUpper = ctype_upper($letters[$i - 1]);
|
|
$currUpper = ctype_upper($letters[$i]);
|
|
if ($prevUpper !== $currUpper) {
|
|
$switches++;
|
|
}
|
|
}
|
|
|
|
// Si hay muchos cambios de caso, es sospechoso
|
|
$switchRatio = $switches / strlen($letters);
|
|
if ($switchRatio > 0.4) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Detectar caracteres repetidos (ej: "aaaa" o "xxxx")
|
|
*/
|
|
private function hasRepeatedChars(string $text): bool
|
|
{
|
|
$maxRepeated = $this->config['max_repeated_chars'];
|
|
$pattern = '/(.)\1{' . $maxRepeated . ',}/';
|
|
|
|
return (bool) preg_match($pattern, $text);
|
|
}
|
|
|
|
/**
|
|
* Detectar palabras extremadamente largas
|
|
*/
|
|
private function hasExtremelyLongWords(string $text): bool
|
|
{
|
|
$words = preg_split('/\s+/', $text);
|
|
$maxLength = $this->config['max_word_length'];
|
|
|
|
foreach ($words as $word) {
|
|
if (strlen($word) > $maxLength) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Verificar si parece un nombre real
|
|
*/
|
|
private function looksLikeName(string $text): bool
|
|
{
|
|
// Un nombre debe tener:
|
|
// 1. Al menos 2 caracteres
|
|
// 2. Empezar con letra
|
|
// 3. No tener numeros excesivos
|
|
// 4. Tener un ratio razonable de vocales
|
|
|
|
if (strlen($text) < 2) {
|
|
return false;
|
|
}
|
|
|
|
// Verificar que empiece con letra
|
|
if (!preg_match('/^[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text)) {
|
|
return false;
|
|
}
|
|
|
|
// No mas de 2 numeros en un nombre
|
|
$numberCount = preg_match_all('/[0-9]/', $text);
|
|
if ($numberCount > 2) {
|
|
return false;
|
|
}
|
|
|
|
// Verificar vocales (un nombre real tiene vocales)
|
|
$vowels = preg_match_all('/[aeiouáéíóúAEIOUÁÉÍÓÚ]/', $text);
|
|
$letters = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
|
|
|
|
if ($letters > 3 && $vowels === 0) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Verificar si el texto parece aleatorio (para validacion cruzada)
|
|
*/
|
|
private function looksRandom(string $text): bool
|
|
{
|
|
if (strlen($text) < 5) {
|
|
return false;
|
|
}
|
|
|
|
$score = 0;
|
|
|
|
// Consonantes excesivas
|
|
if ($this->hasExcessiveConsonants($text)) {
|
|
$score += 2;
|
|
}
|
|
|
|
// Vocales bajas
|
|
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
|
|
$score += 2;
|
|
}
|
|
|
|
// Mayusculas mezcladas
|
|
if ($this->hasExcessiveMixedCase($text)) {
|
|
$score += 2;
|
|
}
|
|
|
|
// Palabras largas
|
|
if ($this->hasExtremelyLongWords($text)) {
|
|
$score += 1;
|
|
}
|
|
|
|
return $score >= 3;
|
|
}
|
|
|
|
/**
|
|
* Loguear intento bloqueado
|
|
*/
|
|
private function logBlocked(string $source, string $field, string $value, string $reason): void
|
|
{
|
|
if (!$this->config['log_blocked']) {
|
|
return;
|
|
}
|
|
|
|
$logMessage = sprintf(
|
|
'ROI Theme Spam Blocked [%s] Field: %s | Reason: %s | Value: %s | IP: %s',
|
|
$source,
|
|
$field,
|
|
$reason,
|
|
substr($value, 0, 100),
|
|
$this->getClientIP()
|
|
);
|
|
|
|
error_log($logMessage);
|
|
}
|
|
|
|
/**
|
|
* Obtener IP del cliente
|
|
*/
|
|
private function getClientIP(): string
|
|
{
|
|
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
|
|
return sanitize_text_field(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])[0]);
|
|
}
|
|
return sanitize_text_field($_SERVER['REMOTE_ADDR'] ?? 'unknown');
|
|
}
|
|
}
|