From eb50c8029798d9eb4123eea33eeaa7f87c50c4c2 Mon Sep 17 00:00:00 2001 From: FrankZamora Date: Fri, 9 Jan 2026 09:46:47 -0600 Subject: [PATCH] feat(api): add spam content detection for forms - Add SpamDetectionService to detect gibberish/random text - Detect excessive consonants, low vowel ratio, mixed case patterns - Detect repeated characters and extremely long words - Validate names look realistic (start with letter, have vowels) - Cross-validate multiple suspicious fields - Integrate with ContactFormAjaxHandler and NewsletterAjaxHandler - Log blocked attempts to debug.log Co-Authored-By: Claude Opus 4.5 --- .../Api/WordPress/ContactFormAjaxHandler.php | 31 +- .../Api/WordPress/NewsletterAjaxHandler.php | 31 +- .../Services/SpamDetectionService.php | 403 ++++++++++++++++++ functions.php | 9 +- 4 files changed, 468 insertions(+), 6 deletions(-) create mode 100644 Shared/Application/Services/SpamDetectionService.php diff --git a/Public/ContactForm/Infrastructure/Api/WordPress/ContactFormAjaxHandler.php b/Public/ContactForm/Infrastructure/Api/WordPress/ContactFormAjaxHandler.php index d278f2ff..8f6ab743 100644 --- a/Public/ContactForm/Infrastructure/Api/WordPress/ContactFormAjaxHandler.php +++ b/Public/ContactForm/Infrastructure/Api/WordPress/ContactFormAjaxHandler.php @@ -5,6 +5,7 @@ namespace ROITheme\Public\ContactForm\Infrastructure\Api\WordPress; use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface; use ROITheme\Shared\Application\Services\RecaptchaValidationService; +use ROITheme\Shared\Application\Services\SpamDetectionService; /** * ContactFormAjaxHandler - Procesa envios del formulario de contacto @@ -28,7 +29,8 @@ final class ContactFormAjaxHandler public function __construct( private ComponentSettingsRepositoryInterface $settingsRepository, - private ?RecaptchaValidationService $recaptchaService = null + private ?RecaptchaValidationService $recaptchaService = null, + private ?SpamDetectionService $spamDetectionService = null ) {} /** @@ -83,7 +85,15 @@ final class ContactFormAjaxHandler return; } - // 5. Obtener configuracion del componente (incluye webhook URL) + // 5. Validar contenido anti-spam (detectar gibberish/texto basura) + if (!$this->validateSpamContent($formData)) { + wp_send_json_error([ + 'message' => __('El contenido del formulario no es valido. Por favor ingresa informacion real.', 'roi-theme') + ], 422); + return; + } + + // 6. Obtener configuracion del componente (incluye webhook URL) $settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME); if (empty($settings)) { @@ -338,4 +348,21 @@ final class ContactFormAjaxHandler // Validar con el servicio return $this->recaptchaService->validateSubmission($token, $action); } + + /** + * Validar contenido anti-spam + * + * @param array $formData Datos del formulario sanitizados + * @return bool True si el contenido es valido (no spam) + */ + private function validateSpamContent(array $formData): bool + { + // Si el servicio no esta inyectado, permitir + if ($this->spamDetectionService === null) { + return true; + } + + $result = $this->spamDetectionService->validateContactForm($formData); + return $result['valid']; + } } diff --git a/Public/Footer/Infrastructure/Api/WordPress/NewsletterAjaxHandler.php b/Public/Footer/Infrastructure/Api/WordPress/NewsletterAjaxHandler.php index 939f1a65..abf28c4e 100644 --- a/Public/Footer/Infrastructure/Api/WordPress/NewsletterAjaxHandler.php +++ b/Public/Footer/Infrastructure/Api/WordPress/NewsletterAjaxHandler.php @@ -5,6 +5,7 @@ namespace ROITheme\Public\Footer\Infrastructure\Api\WordPress; use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface; use ROITheme\Shared\Application\Services\RecaptchaValidationService; +use ROITheme\Shared\Application\Services\SpamDetectionService; /** * NewsletterAjaxHandler - Procesa suscripciones al newsletter @@ -27,7 +28,8 @@ final class NewsletterAjaxHandler public function __construct( private ComponentSettingsRepositoryInterface $settingsRepository, - private ?RecaptchaValidationService $recaptchaService = null + private ?RecaptchaValidationService $recaptchaService = null, + private ?SpamDetectionService $spamDetectionService = null ) {} /** @@ -81,7 +83,15 @@ final class NewsletterAjaxHandler return; } - // 5. Obtener configuracion del componente + // 5. Validar contenido anti-spam (detectar gibberish/texto basura) + if (!$this->validateSpamContent(['name' => $name, 'whatsapp' => $whatsapp])) { + wp_send_json_error([ + 'message' => __('El contenido del formulario no es valido. Por favor ingresa informacion real.', 'roi-theme') + ], 422); + return; + } + + // 6. Obtener configuracion del componente $settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME); if (empty($settings)) { @@ -238,4 +248,21 @@ final class NewsletterAjaxHandler // Validar con el servicio return $this->recaptchaService->validateSubmission($token, $action); } + + /** + * Validar contenido anti-spam + * + * @param array $formData Datos del formulario sanitizados + * @return bool True si el contenido es valido (no spam) + */ + private function validateSpamContent(array $formData): bool + { + // Si el servicio no esta inyectado, permitir + if ($this->spamDetectionService === null) { + return true; + } + + $result = $this->spamDetectionService->validateNewsletterForm($formData); + return $result['valid']; + } } diff --git a/Shared/Application/Services/SpamDetectionService.php b/Shared/Application/Services/SpamDetectionService.php new file mode 100644 index 00000000..a28223b8 --- /dev/null +++ b/Shared/Application/Services/SpamDetectionService.php @@ -0,0 +1,403 @@ + 0.15, // Minimo 15% vocales en texto + 'max_consonant_sequence' => 6, // Maximo 6 consonantes seguidas + 'max_uppercase_ratio' => 0.5, // Maximo 50% mayusculas + 'min_word_length_avg' => 2, // Promedio minimo de longitud de palabra + 'max_word_length' => 25, // Longitud maxima de palabra + 'max_repeated_chars' => 4, // Maximo 4 caracteres repetidos seguidos + 'log_blocked' => true, // Loguear intentos bloqueados + ]; + + private array $config; + + public function __construct(array $config = []) + { + $this->config = array_merge(self::DEFAULT_CONFIG, $config); + } + + /** + * Validar formulario de contacto + * + * @param array $data Datos del formulario [fullName, company, whatsapp, email, message] + * @return array ['valid' => bool, 'reason' => string] + */ + public function validateContactForm(array $data): array + { + $fieldsToCheck = [ + 'fullName' => $data['fullName'] ?? '', + 'company' => $data['company'] ?? '', + 'whatsapp' => $data['whatsapp'] ?? '', + 'message' => $data['message'] ?? '', + ]; + + foreach ($fieldsToCheck as $fieldName => $value) { + if (empty($value)) { + continue; + } + + $result = $this->analyzeText($value, $fieldName); + if (!$result['valid']) { + $this->logBlocked('contact-form', $fieldName, $value, $result['reason']); + return $result; + } + } + + // Validacion cruzada: si multiples campos parecen aleatorios + $suspiciousCount = 0; + foreach ($fieldsToCheck as $fieldName => $value) { + if (!empty($value) && $this->looksRandom($value)) { + $suspiciousCount++; + } + } + + if ($suspiciousCount >= 2) { + $reason = 'Multiples campos con contenido sospechoso'; + $this->logBlocked('contact-form', 'multiple', implode(' | ', $fieldsToCheck), $reason); + return ['valid' => false, 'reason' => $reason]; + } + + return ['valid' => true, 'reason' => '']; + } + + /** + * Validar formulario de newsletter + * + * @param array $data Datos del formulario [name, whatsapp] + * @return array ['valid' => bool, 'reason' => string] + */ + public function validateNewsletterForm(array $data): array + { + $fieldsToCheck = [ + 'name' => $data['name'] ?? '', + 'whatsapp' => $data['whatsapp'] ?? '', + ]; + + foreach ($fieldsToCheck as $fieldName => $value) { + if (empty($value)) { + continue; + } + + $result = $this->analyzeText($value, $fieldName); + if (!$result['valid']) { + $this->logBlocked('newsletter', $fieldName, $value, $result['reason']); + return $result; + } + } + + return ['valid' => true, 'reason' => '']; + } + + /** + * Analizar texto individual + */ + private function analyzeText(string $text, string $fieldName = ''): array + { + $text = trim($text); + + if (empty($text)) { + return ['valid' => true, 'reason' => '']; + } + + // 1. Detectar exceso de consonantes seguidas (gibberish) + if ($this->hasExcessiveConsonants($text)) { + return [ + 'valid' => false, + 'reason' => 'Texto con patron de caracteres invalido' + ]; + } + + // 2. Detectar ratio de vocales muy bajo (para textos latinos) + if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) { + return [ + 'valid' => false, + 'reason' => 'Texto no parece ser legible' + ]; + } + + // 3. Detectar exceso de mayusculas mezcladas + if ($this->hasExcessiveMixedCase($text)) { + return [ + 'valid' => false, + 'reason' => 'Formato de texto invalido' + ]; + } + + // 4. Detectar caracteres repetidos + if ($this->hasRepeatedChars($text)) { + return [ + 'valid' => false, + 'reason' => 'Texto con caracteres repetidos invalidos' + ]; + } + + // 5. Detectar palabras extremadamente largas sin espacios + if ($this->hasExtremelyLongWords($text)) { + return [ + 'valid' => false, + 'reason' => 'Texto con formato invalido' + ]; + } + + // 6. Para campos de nombre, validar que parezca un nombre real + if (in_array($fieldName, ['fullName', 'name']) && !$this->looksLikeName($text)) { + return [ + 'valid' => false, + 'reason' => 'El nombre no tiene un formato valido' + ]; + } + + return ['valid' => true, 'reason' => '']; + } + + /** + * Detectar exceso de consonantes seguidas + */ + private function hasExcessiveConsonants(string $text): bool + { + $consonants = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ'; + $maxSequence = $this->config['max_consonant_sequence']; + + $count = 0; + for ($i = 0; $i < strlen($text); $i++) { + if (strpos($consonants, $text[$i]) !== false) { + $count++; + if ($count > $maxSequence) { + return true; + } + } else { + $count = 0; + } + } + + return false; + } + + /** + * Detectar ratio de vocales muy bajo + */ + private function hasLowVowelRatio(string $text): bool + { + $vowels = 'aeiouAEIOUáéíóúÁÉÍÓÚ'; + $letters = preg_replace('/[^a-zA-ZáéíóúÁÉÍÓÚ]/', '', $text); + + if (strlen($letters) < 4) { + return false; // Texto muy corto, no analizar + } + + $vowelCount = 0; + for ($i = 0; $i < strlen($letters); $i++) { + if (strpos($vowels, $letters[$i]) !== false) { + $vowelCount++; + } + } + + $ratio = $vowelCount / strlen($letters); + return $ratio < $this->config['min_vowel_ratio']; + } + + /** + * Verificar si es texto latino (español/ingles) + */ + private function isLatinText(string $text): bool + { + // Si mas del 80% son caracteres latinos, considerarlo latino + $latinChars = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text); + $totalChars = preg_match_all('/\S/', $text); + + if ($totalChars === 0) { + return false; + } + + return ($latinChars / $totalChars) > 0.8; + } + + /** + * Detectar exceso de mayusculas mezcladas (ej: "MDhFfVCCKZYU") + */ + private function hasExcessiveMixedCase(string $text): bool + { + $letters = preg_replace('/[^a-zA-Z]/', '', $text); + + if (strlen($letters) < 5) { + return false; + } + + $uppercase = preg_match_all('/[A-Z]/', $letters); + $lowercase = preg_match_all('/[a-z]/', $letters); + + // Si hay mas de 50% mayusculas Y hay alternancia frecuente + if ($uppercase > 0 && $lowercase > 0) { + $ratio = $uppercase / strlen($letters); + if ($ratio > $this->config['max_uppercase_ratio']) { + // Verificar alternancia (MdHfF tipo patron) + $switches = 0; + for ($i = 1; $i < strlen($letters); $i++) { + $prevUpper = ctype_upper($letters[$i - 1]); + $currUpper = ctype_upper($letters[$i]); + if ($prevUpper !== $currUpper) { + $switches++; + } + } + + // Si hay muchos cambios de caso, es sospechoso + $switchRatio = $switches / strlen($letters); + if ($switchRatio > 0.4) { + return true; + } + } + } + + return false; + } + + /** + * Detectar caracteres repetidos (ej: "aaaa" o "xxxx") + */ + private function hasRepeatedChars(string $text): bool + { + $maxRepeated = $this->config['max_repeated_chars']; + $pattern = '/(.)\1{' . $maxRepeated . ',}/'; + + return (bool) preg_match($pattern, $text); + } + + /** + * Detectar palabras extremadamente largas + */ + private function hasExtremelyLongWords(string $text): bool + { + $words = preg_split('/\s+/', $text); + $maxLength = $this->config['max_word_length']; + + foreach ($words as $word) { + if (strlen($word) > $maxLength) { + return true; + } + } + + return false; + } + + /** + * Verificar si parece un nombre real + */ + private function looksLikeName(string $text): bool + { + // Un nombre debe tener: + // 1. Al menos 2 caracteres + // 2. Empezar con letra + // 3. No tener numeros excesivos + // 4. Tener un ratio razonable de vocales + + if (strlen($text) < 2) { + return false; + } + + // Verificar que empiece con letra + if (!preg_match('/^[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text)) { + return false; + } + + // No mas de 2 numeros en un nombre + $numberCount = preg_match_all('/[0-9]/', $text); + if ($numberCount > 2) { + return false; + } + + // Verificar vocales (un nombre real tiene vocales) + $vowels = preg_match_all('/[aeiouáéíóúAEIOUÁÉÍÓÚ]/', $text); + $letters = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text); + + if ($letters > 3 && $vowels === 0) { + return false; + } + + return true; + } + + /** + * Verificar si el texto parece aleatorio (para validacion cruzada) + */ + private function looksRandom(string $text): bool + { + if (strlen($text) < 5) { + return false; + } + + $score = 0; + + // Consonantes excesivas + if ($this->hasExcessiveConsonants($text)) { + $score += 2; + } + + // Vocales bajas + if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) { + $score += 2; + } + + // Mayusculas mezcladas + if ($this->hasExcessiveMixedCase($text)) { + $score += 2; + } + + // Palabras largas + if ($this->hasExtremelyLongWords($text)) { + $score += 1; + } + + return $score >= 3; + } + + /** + * Loguear intento bloqueado + */ + private function logBlocked(string $source, string $field, string $value, string $reason): void + { + if (!$this->config['log_blocked']) { + return; + } + + $logMessage = sprintf( + 'ROI Theme Spam Blocked [%s] Field: %s | Reason: %s | Value: %s | IP: %s', + $source, + $field, + $reason, + substr($value, 0, 100), + $this->getClientIP() + ); + + error_log($logMessage); + } + + /** + * Obtener IP del cliente + */ + private function getClientIP(): string + { + if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) { + return sanitize_text_field(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])[0]); + } + return sanitize_text_field($_SERVER['REMOTE_ADDR'] ?? 'unknown'); + } +} diff --git a/functions.php b/functions.php index de802c8c..e8a2a34e 100644 --- a/functions.php +++ b/functions.php @@ -146,17 +146,22 @@ try { // Obtener servicio de validacion reCAPTCHA para inyectar en handlers $recaptchaService = $container->getRecaptchaValidationService(); + // Crear servicio de deteccion de spam para validar contenido + $spamDetectionService = new \ROITheme\Shared\Application\Services\SpamDetectionService(); + // Crear y registrar el handler AJAX para el Contact Form (público) $contactFormAjaxHandler = new \ROITheme\Public\ContactForm\Infrastructure\Api\WordPress\ContactFormAjaxHandler( $container->getComponentSettingsRepository(), - $recaptchaService + $recaptchaService, + $spamDetectionService ); $contactFormAjaxHandler->register(); // Crear y registrar el handler AJAX para Newsletter (público) $newsletterAjaxHandler = new \ROITheme\Public\Footer\Infrastructure\Api\WordPress\NewsletterAjaxHandler( $container->getComponentSettingsRepository(), - $recaptchaService + $recaptchaService, + $spamDetectionService ); $newsletterAjaxHandler->register();