Files
roi-theme/Shared/Application/Services/SpamDetectionService.php
FrankZamora eb50c80297 feat(api): add spam content detection for forms
- Add SpamDetectionService to detect gibberish/random text
- Detect excessive consonants, low vowel ratio, mixed case patterns
- Detect repeated characters and extremely long words
- Validate names look realistic (start with letter, have vowels)
- Cross-validate multiple suspicious fields
- Integrate with ContactFormAjaxHandler and NewsletterAjaxHandler
- Log blocked attempts to debug.log

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-09 09:46:47 -06:00

404 lines
12 KiB
PHP

<?php
declare(strict_types=1);
namespace ROITheme\Shared\Application\Services;
/**
* SpamDetectionService - Detecta texto spam/gibberish en formularios
*
* RESPONSABILIDAD: Analizar contenido de formularios para detectar:
* - Texto aleatorio sin sentido (gibberish)
* - Patrones de spam comunes
* - Caracteres sospechosos
*
* @package ROITheme\Shared\Application\Services
*/
final class SpamDetectionService
{
/**
* Configuracion por defecto
*/
private const DEFAULT_CONFIG = [
'min_vowel_ratio' => 0.15, // Minimo 15% vocales en texto
'max_consonant_sequence' => 6, // Maximo 6 consonantes seguidas
'max_uppercase_ratio' => 0.5, // Maximo 50% mayusculas
'min_word_length_avg' => 2, // Promedio minimo de longitud de palabra
'max_word_length' => 25, // Longitud maxima de palabra
'max_repeated_chars' => 4, // Maximo 4 caracteres repetidos seguidos
'log_blocked' => true, // Loguear intentos bloqueados
];
private array $config;
public function __construct(array $config = [])
{
$this->config = array_merge(self::DEFAULT_CONFIG, $config);
}
/**
* Validar formulario de contacto
*
* @param array $data Datos del formulario [fullName, company, whatsapp, email, message]
* @return array ['valid' => bool, 'reason' => string]
*/
public function validateContactForm(array $data): array
{
$fieldsToCheck = [
'fullName' => $data['fullName'] ?? '',
'company' => $data['company'] ?? '',
'whatsapp' => $data['whatsapp'] ?? '',
'message' => $data['message'] ?? '',
];
foreach ($fieldsToCheck as $fieldName => $value) {
if (empty($value)) {
continue;
}
$result = $this->analyzeText($value, $fieldName);
if (!$result['valid']) {
$this->logBlocked('contact-form', $fieldName, $value, $result['reason']);
return $result;
}
}
// Validacion cruzada: si multiples campos parecen aleatorios
$suspiciousCount = 0;
foreach ($fieldsToCheck as $fieldName => $value) {
if (!empty($value) && $this->looksRandom($value)) {
$suspiciousCount++;
}
}
if ($suspiciousCount >= 2) {
$reason = 'Multiples campos con contenido sospechoso';
$this->logBlocked('contact-form', 'multiple', implode(' | ', $fieldsToCheck), $reason);
return ['valid' => false, 'reason' => $reason];
}
return ['valid' => true, 'reason' => ''];
}
/**
* Validar formulario de newsletter
*
* @param array $data Datos del formulario [name, whatsapp]
* @return array ['valid' => bool, 'reason' => string]
*/
public function validateNewsletterForm(array $data): array
{
$fieldsToCheck = [
'name' => $data['name'] ?? '',
'whatsapp' => $data['whatsapp'] ?? '',
];
foreach ($fieldsToCheck as $fieldName => $value) {
if (empty($value)) {
continue;
}
$result = $this->analyzeText($value, $fieldName);
if (!$result['valid']) {
$this->logBlocked('newsletter', $fieldName, $value, $result['reason']);
return $result;
}
}
return ['valid' => true, 'reason' => ''];
}
/**
* Analizar texto individual
*/
private function analyzeText(string $text, string $fieldName = ''): array
{
$text = trim($text);
if (empty($text)) {
return ['valid' => true, 'reason' => ''];
}
// 1. Detectar exceso de consonantes seguidas (gibberish)
if ($this->hasExcessiveConsonants($text)) {
return [
'valid' => false,
'reason' => 'Texto con patron de caracteres invalido'
];
}
// 2. Detectar ratio de vocales muy bajo (para textos latinos)
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
return [
'valid' => false,
'reason' => 'Texto no parece ser legible'
];
}
// 3. Detectar exceso de mayusculas mezcladas
if ($this->hasExcessiveMixedCase($text)) {
return [
'valid' => false,
'reason' => 'Formato de texto invalido'
];
}
// 4. Detectar caracteres repetidos
if ($this->hasRepeatedChars($text)) {
return [
'valid' => false,
'reason' => 'Texto con caracteres repetidos invalidos'
];
}
// 5. Detectar palabras extremadamente largas sin espacios
if ($this->hasExtremelyLongWords($text)) {
return [
'valid' => false,
'reason' => 'Texto con formato invalido'
];
}
// 6. Para campos de nombre, validar que parezca un nombre real
if (in_array($fieldName, ['fullName', 'name']) && !$this->looksLikeName($text)) {
return [
'valid' => false,
'reason' => 'El nombre no tiene un formato valido'
];
}
return ['valid' => true, 'reason' => ''];
}
/**
* Detectar exceso de consonantes seguidas
*/
private function hasExcessiveConsonants(string $text): bool
{
$consonants = 'bcdfghjklmnpqrstvwxyzBCDFGHJKLMNPQRSTVWXYZ';
$maxSequence = $this->config['max_consonant_sequence'];
$count = 0;
for ($i = 0; $i < strlen($text); $i++) {
if (strpos($consonants, $text[$i]) !== false) {
$count++;
if ($count > $maxSequence) {
return true;
}
} else {
$count = 0;
}
}
return false;
}
/**
* Detectar ratio de vocales muy bajo
*/
private function hasLowVowelRatio(string $text): bool
{
$vowels = 'aeiouAEIOUáéíóúÁÉÍÓÚ';
$letters = preg_replace('/[^a-zA-ZáéíóúÁÉÍÓÚ]/', '', $text);
if (strlen($letters) < 4) {
return false; // Texto muy corto, no analizar
}
$vowelCount = 0;
for ($i = 0; $i < strlen($letters); $i++) {
if (strpos($vowels, $letters[$i]) !== false) {
$vowelCount++;
}
}
$ratio = $vowelCount / strlen($letters);
return $ratio < $this->config['min_vowel_ratio'];
}
/**
* Verificar si es texto latino (español/ingles)
*/
private function isLatinText(string $text): bool
{
// Si mas del 80% son caracteres latinos, considerarlo latino
$latinChars = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
$totalChars = preg_match_all('/\S/', $text);
if ($totalChars === 0) {
return false;
}
return ($latinChars / $totalChars) > 0.8;
}
/**
* Detectar exceso de mayusculas mezcladas (ej: "MDhFfVCCKZYU")
*/
private function hasExcessiveMixedCase(string $text): bool
{
$letters = preg_replace('/[^a-zA-Z]/', '', $text);
if (strlen($letters) < 5) {
return false;
}
$uppercase = preg_match_all('/[A-Z]/', $letters);
$lowercase = preg_match_all('/[a-z]/', $letters);
// Si hay mas de 50% mayusculas Y hay alternancia frecuente
if ($uppercase > 0 && $lowercase > 0) {
$ratio = $uppercase / strlen($letters);
if ($ratio > $this->config['max_uppercase_ratio']) {
// Verificar alternancia (MdHfF tipo patron)
$switches = 0;
for ($i = 1; $i < strlen($letters); $i++) {
$prevUpper = ctype_upper($letters[$i - 1]);
$currUpper = ctype_upper($letters[$i]);
if ($prevUpper !== $currUpper) {
$switches++;
}
}
// Si hay muchos cambios de caso, es sospechoso
$switchRatio = $switches / strlen($letters);
if ($switchRatio > 0.4) {
return true;
}
}
}
return false;
}
/**
* Detectar caracteres repetidos (ej: "aaaa" o "xxxx")
*/
private function hasRepeatedChars(string $text): bool
{
$maxRepeated = $this->config['max_repeated_chars'];
$pattern = '/(.)\1{' . $maxRepeated . ',}/';
return (bool) preg_match($pattern, $text);
}
/**
* Detectar palabras extremadamente largas
*/
private function hasExtremelyLongWords(string $text): bool
{
$words = preg_split('/\s+/', $text);
$maxLength = $this->config['max_word_length'];
foreach ($words as $word) {
if (strlen($word) > $maxLength) {
return true;
}
}
return false;
}
/**
* Verificar si parece un nombre real
*/
private function looksLikeName(string $text): bool
{
// Un nombre debe tener:
// 1. Al menos 2 caracteres
// 2. Empezar con letra
// 3. No tener numeros excesivos
// 4. Tener un ratio razonable de vocales
if (strlen($text) < 2) {
return false;
}
// Verificar que empiece con letra
if (!preg_match('/^[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text)) {
return false;
}
// No mas de 2 numeros en un nombre
$numberCount = preg_match_all('/[0-9]/', $text);
if ($numberCount > 2) {
return false;
}
// Verificar vocales (un nombre real tiene vocales)
$vowels = preg_match_all('/[aeiouáéíóúAEIOUÁÉÍÓÚ]/', $text);
$letters = preg_match_all('/[a-zA-ZáéíóúÁÉÍÓÚñÑ]/', $text);
if ($letters > 3 && $vowels === 0) {
return false;
}
return true;
}
/**
* Verificar si el texto parece aleatorio (para validacion cruzada)
*/
private function looksRandom(string $text): bool
{
if (strlen($text) < 5) {
return false;
}
$score = 0;
// Consonantes excesivas
if ($this->hasExcessiveConsonants($text)) {
$score += 2;
}
// Vocales bajas
if ($this->hasLowVowelRatio($text) && $this->isLatinText($text)) {
$score += 2;
}
// Mayusculas mezcladas
if ($this->hasExcessiveMixedCase($text)) {
$score += 2;
}
// Palabras largas
if ($this->hasExtremelyLongWords($text)) {
$score += 1;
}
return $score >= 3;
}
/**
* Loguear intento bloqueado
*/
private function logBlocked(string $source, string $field, string $value, string $reason): void
{
if (!$this->config['log_blocked']) {
return;
}
$logMessage = sprintf(
'ROI Theme Spam Blocked [%s] Field: %s | Reason: %s | Value: %s | IP: %s',
$source,
$field,
$reason,
substr($value, 0, 100),
$this->getClientIP()
);
error_log($logMessage);
}
/**
* Obtener IP del cliente
*/
private function getClientIP(): string
{
if (!empty($_SERVER['HTTP_X_FORWARDED_FOR'])) {
return sanitize_text_field(explode(',', $_SERVER['HTTP_X_FORWARDED_FOR'])[0]);
}
return sanitize_text_field($_SERVER['REMOTE_ADDR'] ?? 'unknown');
}
}