feat(api): add spam content detection for forms

- Add SpamDetectionService to detect gibberish/random text
- Detect excessive consonants, low vowel ratio, mixed case patterns
- Detect repeated characters and extremely long words
- Validate names look realistic (start with letter, have vowels)
- Cross-validate multiple suspicious fields
- Integrate with ContactFormAjaxHandler and NewsletterAjaxHandler
- Log blocked attempts to debug.log

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
FrankZamora
2026-01-09 09:46:47 -06:00
parent a93e54e1b8
commit eb50c80297
4 changed files with 468 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ namespace ROITheme\Public\ContactForm\Infrastructure\Api\WordPress;
use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface; use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface;
use ROITheme\Shared\Application\Services\RecaptchaValidationService; use ROITheme\Shared\Application\Services\RecaptchaValidationService;
use ROITheme\Shared\Application\Services\SpamDetectionService;
/** /**
* ContactFormAjaxHandler - Procesa envios del formulario de contacto * ContactFormAjaxHandler - Procesa envios del formulario de contacto
@@ -28,7 +29,8 @@ final class ContactFormAjaxHandler
public function __construct( public function __construct(
private ComponentSettingsRepositoryInterface $settingsRepository, private ComponentSettingsRepositoryInterface $settingsRepository,
private ?RecaptchaValidationService $recaptchaService = null private ?RecaptchaValidationService $recaptchaService = null,
private ?SpamDetectionService $spamDetectionService = null
) {} ) {}
/** /**
@@ -83,7 +85,15 @@ final class ContactFormAjaxHandler
return; return;
} }
// 5. Obtener configuracion del componente (incluye webhook URL) // 5. Validar contenido anti-spam (detectar gibberish/texto basura)
if (!$this->validateSpamContent($formData)) {
wp_send_json_error([
'message' => __('El contenido del formulario no es valido. Por favor ingresa informacion real.', 'roi-theme')
], 422);
return;
}
// 6. Obtener configuracion del componente (incluye webhook URL)
$settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME); $settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME);
if (empty($settings)) { if (empty($settings)) {
@@ -338,4 +348,21 @@ final class ContactFormAjaxHandler
// Validar con el servicio // Validar con el servicio
return $this->recaptchaService->validateSubmission($token, $action); return $this->recaptchaService->validateSubmission($token, $action);
} }
/**
* Validar contenido anti-spam
*
* @param array $formData Datos del formulario sanitizados
* @return bool True si el contenido es valido (no spam)
*/
private function validateSpamContent(array $formData): bool
{
// Si el servicio no esta inyectado, permitir
if ($this->spamDetectionService === null) {
return true;
}
$result = $this->spamDetectionService->validateContactForm($formData);
return $result['valid'];
}
} }

View File

@@ -5,6 +5,7 @@ namespace ROITheme\Public\Footer\Infrastructure\Api\WordPress;
use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface; use ROITheme\Shared\Domain\Contracts\ComponentSettingsRepositoryInterface;
use ROITheme\Shared\Application\Services\RecaptchaValidationService; use ROITheme\Shared\Application\Services\RecaptchaValidationService;
use ROITheme\Shared\Application\Services\SpamDetectionService;
/** /**
* NewsletterAjaxHandler - Procesa suscripciones al newsletter * NewsletterAjaxHandler - Procesa suscripciones al newsletter
@@ -27,7 +28,8 @@ final class NewsletterAjaxHandler
public function __construct( public function __construct(
private ComponentSettingsRepositoryInterface $settingsRepository, private ComponentSettingsRepositoryInterface $settingsRepository,
private ?RecaptchaValidationService $recaptchaService = null private ?RecaptchaValidationService $recaptchaService = null,
private ?SpamDetectionService $spamDetectionService = null
) {} ) {}
/** /**
@@ -81,7 +83,15 @@ final class NewsletterAjaxHandler
return; return;
} }
// 5. Obtener configuracion del componente // 5. Validar contenido anti-spam (detectar gibberish/texto basura)
if (!$this->validateSpamContent(['name' => $name, 'whatsapp' => $whatsapp])) {
wp_send_json_error([
'message' => __('El contenido del formulario no es valido. Por favor ingresa informacion real.', 'roi-theme')
], 422);
return;
}
// 6. Obtener configuracion del componente
$settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME); $settings = $this->settingsRepository->getComponentSettings(self::COMPONENT_NAME);
if (empty($settings)) { if (empty($settings)) {
@@ -238,4 +248,21 @@ final class NewsletterAjaxHandler
// Validar con el servicio // Validar con el servicio
return $this->recaptchaService->validateSubmission($token, $action); return $this->recaptchaService->validateSubmission($token, $action);
} }
/**
* Validar contenido anti-spam
*
* @param array $formData Datos del formulario sanitizados
* @return bool True si el contenido es valido (no spam)
*/
private function validateSpamContent(array $formData): bool
{
// Si el servicio no esta inyectado, permitir
if ($this->spamDetectionService === null) {
return true;
}
$result = $this->spamDetectionService->validateNewsletterForm($formData);
return $result['valid'];
}
} }

View File

@@ -0,0 +1,403 @@
<?php
declare(strict_types=1);
namespace ROITheme\Shared\Application\Services;
/**
* SpamDetectionService - Detecta texto spam/gibberish en formularios
*
* RESPONSABILIDAD: Analizar contenido de formularios para detectar:
* - Texto aleatorio sin sentido (gibberish)
* - Patrones de spam comunes
* - Caracteres sospechosos
*
* @package ROITheme\Shared\Application\Services
*/
final class SpamDetectionService
{
/**
* Configuracion por defecto
*/
private const DEFAULT_CONFIG = [
'min_vowel_ratio' => 0.15, // Minimo 15% vocales en texto
'max_consonant_sequence' => 6, // Maximo 6 consonantes seguidas
'max_uppercase_ratio' => 0.5, // Maximo 50% mayusculas
'min_word_length_avg' => 2, // Promedio minimo de longitud de palabra
'max_word_length' => 25, // Longitud maxima de palabra
'max_repeated_chars' => 4, // Maximo 4 caracteres repetidos seguidos
'log_blocked' => true, // Loguear intentos bloqueados
];
private array $config;
public function __construct(array $config = [])
{
$this->config = array_merge(self::DEFAULT_CONFIG, $config);
}
/**
* Validar formulario de contacto
*
* @param array $data Datos del formulario [fullName, company, whatsapp, email, message]
* @return array ['valid' => bool, 'reason' => string]
*/
public function validateContactForm(array $data): array
{
$fieldsToCheck = [
'fullName' => $data['fullName'] ?? '',
'company' => $data['company'] ?? '',
'whatsapp' => $data['whatsapp'] ?? '',
'message' => $data['message'] ?? '',
];
foreach ($fieldsToCheck as $fieldName => $value) {
if (empty($value)) {
continue;
}
$result = $this->analyzeText($value, $fieldName);
if (!$result['valid']) {
$this->logBlocked('contact-form', $fieldName, $value, $result['reason']);
return $result;
}
}
// Validacion cruzada: si multiples campos parecen aleatorios
$suspiciousCount = 0;
foreach ($fieldsToCheck as $fieldName => $value) {
if (!empty($value) && $this->looksRandom($value)) {
$suspiciousCount++;
}
}
if ($suspiciousCount >= 2) {
$reason = 'Multiples campos con contenido sospechoso';
$this->logBlocked('contact-form', 'multiple', implode(' | ', $fieldsToCheck), $reason);
return ['valid' => false, 'reason' => $reason];
}
return ['valid' => true, 'reason' => ''];
}
/**
* Validar formulario de newsletter
*
* @param array $data Datos del formulario [name, whatsapp]
* @return array ['valid' => bool, 'reason' => string]
*/
public function validateNewsletterForm(array $data): array
{
$fieldsToCheck = [
'name' => $data['name'] ?? '',
'whatsapp' => $data['whatsapp'] ?? '',
];
foreach ($fieldsToCheck as $fieldName => $value) {
if (empty($value)) {
continue;
}
$result = $this->analyzeText($value, $fieldName);
if (!$result['valid']) {
$this->logBlocked('newsletter', $fieldName, $value, $result['reason']);
return $result;
}
}
return ['valid' => true, 'reason' => ''];
}
/**
* Analizar texto individual
*/
private function analyzeText(string $text, string $fieldName = ''): array
{
$text = trim($text);
if (empty($text)) {
return ['valid' => true, 'reason' => ''];
}
// 1. Detectar exceso de consonantes seguidas (gibberish)
if ($this->hasExcessiveConsonants($text)) {
return [
'valid' => false,
'reason' => 'Texto con patron de caracteres invalido'
];
}
// 2. Detectar ratio de vocales muy bajo (para textos latinos)
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
return [
'valid' => false,
'reason' => 'Texto no parece ser legible'
];
}
// 3. Detectar exceso de mayusculas mezcladas
if ($this->hasExcessiveMixedCase($text)) {
return [
'valid' => false,
'reason' => 'Formato de texto invalido'
];
}
// 4. Detectar caracteres repetidos
if ($this->hasRepeatedChars($text)) {
return [
'valid' => false,
'reason' => 'Texto con caracteres repetidos invalidos'
];
}
// 5. Detectar palabras extremadamente largas sin espacios
if ($this->hasExtremelyLongWords($text)) {
return [
'valid' => false,
'reason' => 'Texto con formato invalido'
];
}
// 6. Para campos de nombre, validar que parezca un nombre real
if (in_array($fieldName, ['fullName', 'name']) && !$this->looksLikeName($text)) {
return [
'valid' => false,
'reason' => 'El nombre no tiene un formato valido'
];
}
return ['valid' => true, 'reason' => ''];
}
/**
* Detectar exceso de consonantes seguidas
*/
private function hasExcessiveConsonants(string $text): bool
{
$consonants = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ';
$maxSequence = $this->config['max_consonant_sequence'];
$count = 0;
for ($i = 0; $i < strlen($text); $i++) {
if (strpos($consonants, $text[$i]) !== false) {
$count++;
if ($count > $maxSequence) {
return true;
}
} else {
$count = 0;
}
}
return false;
}
/**
* Detectar ratio de vocales muy bajo
*/
private function hasLowVowelRatio(string $text): bool
{
$vowels = 'aeiouAEIOUáéíóúÁÉÍÓÚ';
$letters = preg_replace('/[^a-zA-ZáéíóúÁÉÍÓÚ]/', '', $text);
if (strlen($letters) < 4) {
return false; // Texto muy corto, no analizar
}
$vowelCount = 0;
for ($i = 0; $i < strlen($letters); $i++) {
if (strpos($vowels, $letters[$i]) !== false) {
$vowelCount++;
}
}
$ratio = $vowelCount / strlen($letters);
return $ratio < $this->config['min_vowel_ratio'];
}
/**
* Verificar si es texto latino (español/ingles)
*/
private function isLatinText(string $text): bool
{
// Si mas del 80% son caracteres latinos, considerarlo latino
$latinChars = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
$totalChars = preg_match_all('/\S/', $text);
if ($totalChars === 0) {
return false;
}
return ($latinChars / $totalChars) > 0.8;
}
/**
* Detectar exceso de mayusculas mezcladas (ej: "MDhFfVCCKZYU")
*/
private function hasExcessiveMixedCase(string $text): bool
{
$letters = preg_replace('/[^a-zA-Z]/', '', $text);
if (strlen($letters) < 5) {
return false;
}
$uppercase = preg_match_all('/[A-Z]/', $letters);
$lowercase = preg_match_all('/[a-z]/', $letters);
// Si hay mas de 50% mayusculas Y hay alternancia frecuente
if ($uppercase > 0 && $lowercase > 0) {
$ratio = $uppercase / strlen($letters);
if ($ratio > $this->config['max_uppercase_ratio']) {
// Verificar alternancia (MdHfF tipo patron)
$switches = 0;
for ($i = 1; $i < strlen($letters); $i++) {
$prevUpper = ctype_upper($letters[$i - 1]);
$currUpper = ctype_upper($letters[$i]);
if ($prevUpper !== $currUpper) {
$switches++;
}
}
// Si hay muchos cambios de caso, es sospechoso
$switchRatio = $switches / strlen($letters);
if ($switchRatio > 0.4) {
return true;
}
}
}
return false;
}
/**
* Detectar caracteres repetidos (ej: "aaaa" o "xxxx")
*/
private function hasRepeatedChars(string $text): bool
{
$maxRepeated = $this->config['max_repeated_chars'];
$pattern = '/(.)\1{' . $maxRepeated . ',}/';
return (bool) preg_match($pattern, $text);
}
/**
* Detectar palabras extremadamente largas
*/
private function hasExtremelyLongWords(string $text): bool
{
$words = preg_split('/\s+/', $text);
$maxLength = $this->config['max_word_length'];
foreach ($words as $word) {
if (strlen($word) > $maxLength) {
return true;
}
}
return false;
}
/**
* Verificar si parece un nombre real
*/
private function looksLikeName(string $text): bool
{
// Un nombre debe tener:
// 1. Al menos 2 caracteres
// 2. Empezar con letra
// 3. No tener numeros excesivos
// 4. Tener un ratio razonable de vocales
if (strlen($text) < 2) {
return false;
}
// Verificar que empiece con letra
if (!preg_match('/^[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text)) {
return false;
}
// No mas de 2 numeros en un nombre
$numberCount = preg_match_all('/[0-9]/', $text);
if ($numberCount > 2) {
return false;
}
// Verificar vocales (un nombre real tiene vocales)
$vowels = preg_match_all('/[aeiouáéíóúAEIOUÁÉÍÓÚ]/', $text);
$letters = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
if ($letters > 3 && $vowels === 0) {
return false;
}
return true;
}
/**
* Verificar si el texto parece aleatorio (para validacion cruzada)
*/
private function looksRandom(string $text): bool
{
if (strlen($text) < 5) {
return false;
}
$score = 0;
// Consonantes excesivas
if ($this->hasExcessiveConsonants($text)) {
$score += 2;
}
// Vocales bajas
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
$score += 2;
}
// Mayusculas mezcladas
if ($this->hasExcessiveMixedCase($text)) {
$score += 2;
}
// Palabras largas
if ($this->hasExtremelyLongWords($text)) {
$score += 1;
}
return $score >= 3;
}
/**
* Loguear intento bloqueado
*/
private function logBlocked(string $source, string $field, string $value, string $reason): void
{
if (!$this->config['log_blocked']) {
return;
}
$logMessage = sprintf(
'ROI Theme Spam Blocked [%s] Field: %s | Reason: %s | Value: %s | IP: %s',
$source,
$field,
$reason,
substr($value, 0, 100),
$this->getClientIP()
);
error_log($logMessage);
}
/**
* Obtener IP del cliente
*/
private function getClientIP(): string
{
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
return sanitize_text_field(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])[0]);
}
return sanitize_text_field($_SERVER['REMOTE_ADDR'] ?? 'unknown');
}
}

View File

@@ -146,17 +146,22 @@ try {
// Obtener servicio de validacion reCAPTCHA para inyectar en handlers // Obtener servicio de validacion reCAPTCHA para inyectar en handlers
$recaptchaService = $container->getRecaptchaValidationService(); $recaptchaService = $container->getRecaptchaValidationService();
// Crear servicio de deteccion de spam para validar contenido
$spamDetectionService = new \ROITheme\Shared\Application\Services\SpamDetectionService();
// Crear y registrar el handler AJAX para el Contact Form (público) // Crear y registrar el handler AJAX para el Contact Form (público)
$contactFormAjaxHandler = new \ROITheme\Public\ContactForm\Infrastructure\Api\WordPress\ContactFormAjaxHandler( $contactFormAjaxHandler = new \ROITheme\Public\ContactForm\Infrastructure\Api\WordPress\ContactFormAjaxHandler(
$container->getComponentSettingsRepository(), $container->getComponentSettingsRepository(),
$recaptchaService $recaptchaService,
$spamDetectionService
); );
$contactFormAjaxHandler->register(); $contactFormAjaxHandler->register();
// Crear y registrar el handler AJAX para Newsletter (público) // Crear y registrar el handler AJAX para Newsletter (público)
$newsletterAjaxHandler = new \ROITheme\Public\Footer\Infrastructure\Api\WordPress\NewsletterAjaxHandler( $newsletterAjaxHandler = new \ROITheme\Public\Footer\Infrastructure\Api\WordPress\NewsletterAjaxHandler(
$container->getComponentSettingsRepository(), $container->getComponentSettingsRepository(),
$recaptchaService $recaptchaService,
$spamDetectionService
); );
$newsletterAjaxHandler->register(); $newsletterAjaxHandler->register();