Commit inicial - WordPress Análisis de Precios Unitarios

- WordPress core y plugins
- Tema Twenty Twenty-Four configurado
- Plugin allow-unfiltered-html.php simplificado
- .gitignore configurado para excluir wp-config.php y uploads

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
root
2025-11-03 21:04:30 -06:00
commit a22573bf0b
24068 changed files with 4993111 additions and 0 deletions

View File

@@ -0,0 +1,96 @@
<?php
namespace WPDRMS\ASP\Index;
defined('ABSPATH') or die("You can't access this file directly.");
if ( !class_exists(__NAMESPACE__ . '\Content') ) {
class Content {
public static function mySQLFixes( $str ) {
return str_replace(array(
// SELECT œ LIKE oe => FALSE but SELECT œ = oe => TRUE
// When doing INSERT with "oe" or "œ" in keyword, it is ignored as it is considered already existing
'œ'
),array(
'oe'
), $str);
}
public static function arabicRemoveDiacritics( $str ) {
if ( is_array($str) ) {
foreach ($str as &$v) {
$v = self::arabicRemoveDiacritics($v);
}
return $str;
}
$characters = array(
"~[\x{0600}-\x{061F}]~u",
"~[\x{063B}-\x{063F}]~u",
"~[\x{064B}-\x{065E}]~u",
"~[\x{066A}-\x{06FF}]~u",
);
return preg_replace($characters, "", $str);
}
public static function hebrewUnvocalize( $str ) {
if ( is_array($str) ) {
foreach ($str as &$v) {
$v = self::hebrewUnvocalize($v);
}
return $str;
}
if ( preg_match("/[\x{0591}-\x{05F4}]/u", $str) ) {
$hebrew_common_ligatures = array(
'ײַ' => 'ײ',
'ﬠ' => 'ע',
'ﬡ' => 'א',
'ﬢ' => 'ד',
'ﬣ' => 'ה',
'ﬤ' => 'כ',
'ﬥ' => 'ל',
'ﬦ' => 'ם',
'ﬧ' => 'ר',
'ﬨ' => 'ת',
'שׁ' => 'ש',
'שׂ' => 'ש',
'שּׁ' => 'ש',
'שּׂ' => 'ש',
'אַ' => 'א',
'אָ' => 'א',
'אּ' => 'א',
'בּ' => 'ב',
'גּ' => 'ג',
'דּ' => 'ד',
'הּ' => 'ה',
'וּ' => 'ו',
'זּ' => 'ז',
'טּ' => 'ט',
'יּ' => 'י',
'ךּ' => 'ך',
'כּ' => 'כ',
'לּ' => 'ל',
'מּ' => 'מ',
'נּ' => 'נ',
'סּ' => 'ס',
'ףּ' => 'ף',
'פּ' => 'פ',
'צּ' => 'צ',
'קּ' => 'ק',
'רּ' => 'ר',
'שּ' => 'ש',
'תּ' => 'ת',
'וֹ' => 'ו',
'בֿ' => 'ב',
'כֿ' => 'כ',
'פֿ' => 'פ',
'ﭏ' => 'אל'
);
$str = trim(preg_replace('/\p{Mn}/u', '', $str));
foreach ($hebrew_common_ligatures as $word1 => $word2) {
$str = trim(str_replace($word1, $word2, $str));
}
}
return $str;
}
}
}

View File

@@ -0,0 +1,563 @@
<?php
namespace WPDRMS\ASP\Index;
use WPDRMS\ASP\Utils\Plugin;
use WPDRMS\ASP\Utils\Str;
defined('ABSPATH') or die("You can't access this file directly.");
if ( !class_exists(__NAMESPACE__ . '\Database') ) {
class Database {
private $table_name;
function __construct() {
require_once ABSPATH . 'wp-admin/includes/upgrade.php';
$this->table_name = wd_asp()->db->table('index');
}
function create(): array {
global $wpdb;
$return = array();
$charset_collate = '';
if ( ! empty( $wpdb->charset ) ) {
$charset_collate_bin_column = "CHARACTER SET $wpdb->charset";
$charset_collate = "DEFAULT $charset_collate_bin_column";
}
if ( strpos( $wpdb->collate, '_' ) > 0 ) {
$charset_collate .= " COLLATE $wpdb->collate";
}
$query = '
CREATE TABLE IF NOT EXISTS ' . $this->table_name . " (
doc bigint(20) UNSIGNED NOT NULL DEFAULT '0',
term varchar(150) NOT NULL DEFAULT '0',
term_reverse varchar(150) NOT NULL DEFAULT '0',
blogid mediumint(9) UNSIGNED NOT NULL DEFAULT '0',
content smallint(9) UNSIGNED NOT NULL DEFAULT '0',
title tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
comment tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
tag tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
link tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
author tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
excerpt tinyint(3) UNSIGNED NOT NULL DEFAULT '0',
customfield smallint(9) UNSIGNED NOT NULL DEFAULT '0',
post_type varchar(50) NOT NULL DEFAULT 'post',
lang varchar(20) NOT NULL DEFAULT '0',
PRIMARY KEY doctermitem (doc, term, blogid)) $charset_collate";
dbDelta( $query );
$return[] = $query;
$query = "SHOW INDEX FROM $this->table_name";
$indices = $wpdb->get_results( $query );
$existing_indices = array();
foreach ( $indices as $index ) {
if ( isset( $index->Key_name ) ) {
$existing_indices[] = $index->Key_name;
}
}
// Worst case scenario optimal indexes
if ( ! in_array( 'term_ptype_bid_lang', $existing_indices ) ) {
$sql = "CREATE INDEX term_ptype_bid_lang ON $this->table_name (term(20), post_type(20), blogid, lang(10))";
$wpdb->query( $sql );
$return[] = $sql;
}
if ( ! in_array( 'rterm_ptype_bid_lang', $existing_indices ) ) {
$sql = "CREATE INDEX rterm_ptype_bid_lang ON $this->table_name (term_reverse(20), post_type(20), blogid, lang(10))";
$wpdb->query( $sql );
$return[] = $sql;
}
if ( !in_array( 'doc', $existing_indices ) ) {
$sql = "CREATE INDEX `doc` ON $this->table_name (`doc`)";
$wpdb->query( $sql );
$return[] = $sql;
}
return $return;
}
public function scheduled() {
global $wpdb;
// 4.20.3
if ( Plugin::previousVersion('4.20.2') ) {
if ( $wpdb->get_var( "SHOW COLUMNS FROM `$this->table_name` LIKE 'taxonomy';" ) ) {
$query = "ALTER TABLE `$this->table_name`
DROP COLUMN `taxonomy`,
DROP COLUMN `category`,
DROP COLUMN `item`";
$wpdb->query($query);
$query = "ALTER TABLE `$this->table_name`
MODIFY COLUMN `content` smallint(9) UNSIGNED,
MODIFY COLUMN `title` tinyint(3) UNSIGNED,
MODIFY COLUMN `comment` tinyint(3) UNSIGNED,
MODIFY COLUMN `tag` tinyint(3) UNSIGNED,
MODIFY COLUMN `link` tinyint(3) UNSIGNED,
MODIFY COLUMN `author` tinyint(3) UNSIGNED,
MODIFY COLUMN `excerpt` tinyint(3) UNSIGNED,
MODIFY COLUMN `customfield` smallint(9) UNSIGNED";
$wpdb->query( $query );
$query = "OPTIMIZE TABLE `$this->table_name`";
$wpdb->query( $query );
}
}
}
/**
* Runs a table optimize query on the index table
*
* @return bool|false|int
*/
public function optimize() {
global $wpdb;
// In innoDB this is mapped to "ALTER TABLE .. FORCE", aka. defragmenting
// OPTIMIZE only needs SELECT and INSERT privileges
return $wpdb->query( 'OPTIMIZE TABLE ' . $this->table_name );
}
public function truncate() {
global $wpdb;
$wpdb->query( 'TRUNCATE TABLE ' . $this->table_name );
}
function removeDocument( $post_id ) {
global $wpdb;
if ( is_array($post_id) ) {
foreach ( $post_id as &$v ) {
$v = $v + 0;
}
$post_ids = implode(', ', $post_id);
$wpdb->query( "DELETE FROM $this->table_name WHERE doc IN ($post_ids)" );
} else {
$wpdb->query(
$wpdb->prepare(
"DELETE FROM $this->table_name WHERE doc = %d",
$post_id
)
);
}
/*
DO NOT call finishOperation() here, it would switch back the blog too early.
Calling this function from an action hooks does not require switching the blog,
as the correct one is in use there.
*/
}
/**
* Generates the query based on the post and the token array and inserts into DB
*
* @return int
*/
function insertTokensToDB( $the_post, $tokens, $blog_id, $lang ) {
global $wpdb;
$values = array();
if ( count( $tokens ) <= 0 ) {
return false;
}
foreach ( $tokens as $d ) {
// If it's numeric, delete the leading space
$term = trim( $d['_keyword'] );
if ( isset($d['_no_reverse']) && $d['_no_reverse'] === true ) {
$value = $wpdb->prepare(
'(%d, %s, %s, %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s)',
$the_post->ID,
$term,
'',
$blog_id,
$d['content'],
$d['title'],
$d['comment'],
$d['tag'],
$d['link'],
$d['author'],
$d['excerpt'],
$d['customfield'],
$the_post->post_type,
$lang
);
} else {
$value = $wpdb->prepare(
'(%d, %s, REVERSE(%s), %d, %d, %d, %d, %d, %d, %d, %d, %d, %s, %s)',
$the_post->ID,
$term,
$term,
$blog_id,
$d['content'],
$d['title'],
$d['comment'],
$d['tag'],
$d['link'],
$d['author'],
$d['excerpt'],
$d['customfield'],
$the_post->post_type,
$lang
);
}
$values[] = $value;
// Split INSERT at every 200 records
if ( count( $values ) > 199 ) {
$values = implode( ', ', $values );
$query = "INSERT IGNORE INTO $this->table_name
(`doc`, `term`, `term_reverse`, `blogid`, `content`, `title`, `comment`, `tag`, `link`, `author`,
`excerpt`, `customfield`, `post_type`, `lang`)
VALUES $values";
$wpdb->query( $query );
$values = array();
}
}
// Add the remaining as well
if ( count( $values ) > 0 ) {
$values = implode( ', ', $values );
$query = "INSERT IGNORE INTO $this->table_name
(`doc`, `term`, `term_reverse`, `blogid`, `content`, `title`, `comment`, `tag`, `link`, `author`,
`excerpt`, `customfield`, `post_type`, `lang`)
VALUES $values";
$wpdb->query( $query );
}
return count( $tokens );
}
/**
* Gets the post IDs to index
*
* @return array of post IDs
*/
function getPostIdsToIndex( $args, $posts_to_ignore ): array {
global $wpdb;
$parent_join = '';
$_statuses = explode(',', $args['post_statuses']);
foreach ( $_statuses as &$sv ) {
$sv = trim($sv);
}
$valid_status = "'" . implode("', '", $_statuses ) . "'";
if ( count($args['post_types']) > 0 ) {
$post_types = $args['post_types'];
if ( class_exists('WooCommerce') && in_array('product_variation', $post_types) ) { // Special case for Woo variations
$post_types = array_diff($post_types, array( 'product_variation' ));
$rest = '';
if ( count($post_types) > 0 ) {
$rest = " OR post.post_type IN('" . implode("', '", $post_types) . "') ";
}
// In case of product variation the parent post status must also match, otherwise it is not relevant
$parent_join = "LEFT JOIN $wpdb->posts parent ON (post.post_parent = parent.ID)";
$restriction = " AND ( (post.post_type = 'product_variation' AND parent.post_status IN($valid_status) ) $rest )";
} else {
$restriction = " AND post.post_type IN ('" . implode("', '", $post_types) . "')";
}
} else {
return array();
}
$mimes_restrict = '';
if ( in_array('attachment', $args['post_types'], true) ) {
$restriction .= $this->getAttachmentDirRestrictionQueryPart( $args );
if ( $args['attachment_mime_types'] != '' ) {
$mimes_arr = wpd_comma_separated_to_array($args['attachment_mime_types']);
if ( count($mimes_arr) > 0 ) {
$mimes_restrict = "OR ( post.post_status = 'inherit' AND post.post_mime_type IN ('" . implode("','", $mimes_arr) . "') )";
}
}
}
$post_password = '';
if ( $args['post_password_protected'] == 0 ) {
$post_password = " AND (post.post_password = '') ";
}
$ignore_posts = '';
if ( !empty($posts_to_ignore[ $args['blog_id'] ]) ) {
$ignore_posts = ' AND post.ID NOT IN( ' . implode(',', $posts_to_ignore[ $args['blog_id'] ]) . ' )';
}
$limit = $args['limit'] > 1000 ? 1000 : ( $args['limit'] + 0 );
$add_where = apply_filters('asp/index/database/get_posts_to_index/query/add_where', '', $args);
$add_where_post_status = apply_filters('asp/index/database/get_posts_to_index/query/add_where_post_status', '', $args);
if ( $args['extend'] ) {
// We are extending, so keep the existing
$q = "SELECT post.ID
FROM $wpdb->posts post
$parent_join
LEFT JOIN $this->table_name r ON (post.ID = r.doc AND r.blogid = " . $args['blog_id'] . ")
WHERE
r.doc is null
AND
(
post.post_status IN ($valid_status)
$mimes_restrict
$add_where_post_status
)
$restriction
$ignore_posts
$post_password
$add_where
ORDER BY post.ID ASC
LIMIT $limit";
} else {
$q = "SELECT post.ID
FROM $wpdb->posts post
$parent_join
WHERE
(
post.post_status IN ($valid_status)
$mimes_restrict
$add_where_post_status
)
$restriction
$ignore_posts
$post_password
$add_where
ORDER BY post.ID ASC
LIMIT $limit";
}
return $wpdb->get_results( $q );
}
/**
* Gets the number documents to index
*/
public function getPostIdsToIndexCount( $args, $posts_to_ignore, $check_only = false ): int {
if ( defined('ASP_INDEX_BYPASS_COUNT') ) {
return 9999;
}
global $wpdb;
$parent_join = '';
$_statuses = explode(',', $args['post_statuses']);
foreach ( $_statuses as &$sv ) {
$sv = trim($sv);
}
$valid_status = "'" . implode("', '", $_statuses ) . "'";
$mimes_restrict = '';
if ( count($args['post_types']) > 0 ) {
$post_types = $args['post_types'];
if ( class_exists('WooCommerce') && in_array('product_variation', $post_types) ) { // Special case for Woo variations
$post_types = array_diff($post_types, array( 'product_variation' ));
$rest = '';
if ( count($post_types) > 0 ) { // are there any left?
$rest = " OR post.post_type IN('" . implode("', '", $post_types) . "') ";
}
// In case of product variation the parent post status must also match, otherwise it is not relevant
$parent_join = "LEFT JOIN $wpdb->posts parent ON (post.post_parent = parent.ID)";
$restriction = " AND ( (post.post_type = 'product_variation' AND parent.post_status IN($valid_status) ) $rest )";
} else {
$restriction = " AND post.post_type IN ('" . implode("', '", $post_types) . "')";
}
} else {
return 0;
}
if ( in_array('attachment', $args['post_types'], true) ) {
$restriction .= $this->getAttachmentDirRestrictionQueryPart( $args );
if ( $args['attachment_mime_types'] != '' ) {
$mimes_arr = wpd_comma_separated_to_array($args['attachment_mime_types']);
if ( count($mimes_arr) > 0 ) {
$mimes_restrict = "OR ( post.post_status = 'inherit' AND post.post_mime_type IN ('" . implode("','", $mimes_arr) . "') )";
}
}
}
$post_password = '';
if ( $args['post_password_protected'] == 0 ) {
$post_password = " AND (post.post_password = '') ";
}
$ignore_posts = '';
if ( !empty($posts_to_ignore[ $args['blog_id'] ]) ) {
$ignore_posts = ' AND post.ID NOT IN( ' . implode(',', $posts_to_ignore[ $args['blog_id'] ]) . ' )';
}
$add_where = apply_filters('asp/index/database/get_posts_to_index/query/add_where', '', $args);
$add_where_post_status = apply_filters('asp/index/database/get_posts_to_index/query/add_where_post_status', '', $args);
if ( $check_only ) {
$q = "SELECT 1
FROM $wpdb->posts post
$parent_join
LEFT JOIN $this->table_name r ON (post.ID = r.doc AND r.blogid = " . $args['blog_id'] . ")
WHERE
r.doc is null
AND
(
post.post_status IN ($valid_status)
$mimes_restrict
$add_where_post_status
)
$restriction
$ignore_posts
$post_password
$add_where
LIMIT 1";
} else {
$q = "SELECT COUNT(DISTINCT post.ID)
FROM $wpdb->posts post
$parent_join
LEFT JOIN $this->table_name r ON (post.ID = r.doc AND r.blogid = " . $args['blog_id'] . ")
WHERE
r.doc is null
AND
(
post.post_status IN ($valid_status)
$mimes_restrict
$add_where_post_status
)
$restriction
$ignore_posts
$post_password
$add_where";
}
return intval( $wpdb->get_var( $q ) );
}
private function getAttachmentDirRestrictionQueryPart( array $args ): string {
global $wpdb;
$attachment_dir_query = '';
$uploads = wp_get_upload_dir();
if ( false !== $uploads['error'] ) {
return '';
}
/**
* This parts makes corrections to "trim" the absolute path to the upload directory
* as _wp_attached_file stores the directory part only after ../wp-content/uploads/
*/
$uploads_relative_dir =
trailingslashit( str_replace(ABSPATH, '', $uploads['basedir']) );
$exclude_directories = array_filter(
array_map(
function ( $dir ) use ( $uploads_relative_dir ) {
return str_replace($uploads_relative_dir, '', $dir);
},
$args['attachment_exclude_directories']
)
);
$include_directories = array_filter(
array_map(
function ( $dir ) use ( $uploads_relative_dir ) {
return str_replace($uploads_relative_dir, '', $dir);
},
$args['attachment_include_directories']
)
);
if ( count($exclude_directories) > 0 ) {
$not_like_values = implode(
'AND',
array_map(
function ( $directory ) use ( $wpdb ) {
return " $wpdb->postmeta.meta_value NOT LIKE '" . Str::escape($directory) . "%'";
},
$exclude_directories
)
);
$attachment_dir_query .= " AND (
(
(
SELECT IF((meta_key IS NULL OR meta_value = ''), -1, COUNT(meta_id))
FROM $wpdb->postmeta
WHERE $wpdb->postmeta.post_id = post.ID AND $wpdb->postmeta.meta_key='_wp_attached_file'
LIMIT 1
) = -1
OR
(
SELECT COUNT(meta_id) as mtc
FROM $wpdb->postmeta
WHERE $wpdb->postmeta.post_id = post.ID AND $wpdb->postmeta.meta_key='_wp_attached_file' AND
($not_like_values)
ORDER BY mtc
LIMIT 1
) >= 1
) )";
}
if ( count($include_directories) > 0 ) {
$not_like_values = implode(
'OR',
array_map(
function ( $directory ) use ( $wpdb ) {
return " $wpdb->postmeta.meta_value LIKE '" . Str::escape($directory) . "%'";
},
$include_directories
)
);
$attachment_dir_query .= " AND (
(
(
SELECT IF((meta_key IS NULL OR meta_value = ''), -1, COUNT(meta_id))
FROM $wpdb->postmeta
WHERE $wpdb->postmeta.post_id = post.ID AND $wpdb->postmeta.meta_key='_wp_attached_file'
LIMIT 1
) = -1
OR
(
SELECT COUNT(meta_id) as mtc
FROM $wpdb->postmeta
WHERE $wpdb->postmeta.post_id = post.ID AND $wpdb->postmeta.meta_key='_wp_attached_file' AND
($not_like_values)
ORDER BY mtc
LIMIT 1
) >= 1
) )";
}
return $attachment_dir_query;
}
public function getPostsIndexed() {
if ( defined('ASP_INDEX_BYPASS_COUNT') ) {
return 9999;
}
global $wpdb;
// Tested faster as a regular single query count
$sql = 'SELECT COUNT(count) FROM (SELECT 1 as count FROM ' . wd_asp()->db->table('index') . ' GROUP BY doc) as A';
return $wpdb->get_var($sql);
}
public function getTotalKeywords() {
if ( defined('ASP_INDEX_BYPASS_COUNT') ) {
return 9999;
}
global $wpdb;
if ( is_multisite() ) {
$sql = 'SELECT COUNT(doc) FROM ' . wd_asp()->db->table('index');
} else {
$sql = 'SELECT COUNT(doc) FROM ' . wd_asp()->db->table('index') . ' WHERE blogid = ' . get_current_blog_id();
}
return $wpdb->get_var($sql);
}
public function isEmpty(): bool {
global $wpdb;
return $wpdb->query('SELECT 1 FROM ' . wd_asp()->db->table('index') . ' LIMIT 1') == 0;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,166 @@
<?php /** @noinspection PhpComposerExtensionStubsInspection */
/** @noinspection RegExpRedundantEscape */
namespace WPDRMS\ASP\Index;
use WPDRMS\ASP\Utils\Str;
defined('ABSPATH') or die("You can't access this file directly.");
if ( !class_exists(__NAMESPACE__ . '\Shortcode') ) {
class Shortcode {
private $temporary_shortcode_tags = array();
/**
* Executes the shortcodes within the given string
*
* @param $content
* @param $post
* @param $exclude
* @return string
* @noinspection PhpUndefinedClassInspection
*/
function execute($content, $post, $exclude): string {
$content = apply_filters( 'asp_index_before_shortcode_execution', $content, $post );
// WP Table Reloaded support
if ( defined( 'WP_TABLE_RELOADED_ABSPATH' ) ) {
/** @noinspection PhpIncludeInspection */
include_once( WP_TABLE_RELOADED_ABSPATH . 'controllers/controller-frontend.php' );
if ( class_exists('\\WP_Table_Reloaded_Controller_Frontend') ) {
/** @noinspection PhpFullyQualifiedNameUsageInspection */
$wpt_reloaded = new \WP_Table_Reloaded_Controller_Frontend();
}
}
// TablePress support
if ( defined( 'TABLEPRESS_ABSPATH' ) && class_exists('\\TablePress') ) {
$content .= ' ' . $this->parseTablePressShortcodes($content);
}
// Remove user defined shortcodes
$shortcodes = explode( ',',$exclude );
$try_getting_sc_content = apply_filters('asp_it_try_getting_sc_content', true);
foreach ( $shortcodes as $shortcode ) {
$shortcode = trim($shortcode);
if ( $shortcode == '' )
continue;
// First let us try to get any contents from the shortcode itself
if ( $try_getting_sc_content ) {
$content = preg_replace(
'/(?:\[' . $shortcode . '[ ]+.*?\]|\[' . $shortcode . '[ ]*\])(.*?)\[\/' . $shortcode . '[ ]*]/su',
' $1 ',
$content
);
}
// Then remove the shortcode completely
$this->temporaryDisableShortcode($shortcode);
}
// Try extracting the content of these shortcodes, but do not execute
$more_shortcodes = array(
'cws-widget', 'cws-row', 'cws-column', 'col', 'row', 'item'
);
foreach ( $more_shortcodes as $shortcode ) {
// First let us try to get any contents from the shortcode itself
$content = preg_replace(
'/(?:\[' . $shortcode . '[ ]+.*?\]|\[' . $shortcode . '[ ]*\])(.*?)\[\/' . $shortcode . '[ ]*]/su',
' $1 ',
$content
);
/*remove_shortcode( $shortcode );
add_shortcode( $shortcode, array( $this, 'return_empty_string' ) );*/
$this->temporaryDisableShortcode($shortcode);
}
// These shortcodes are completely ignored, and removed with content
$ignore_shortcodes = array(
'vc_asp_search',
'ts_products_in_category_tabs',
'wd_asp',
'wpdreams_ajaxsearchpro',
'wpdreams_ajaxsearchpro_results',
'wpdreams_asp_settings',
'contact-form',
'starrater',
'responsive-flipbook',
'avatar_upload',
'product_categories',
'recent_products',
'templatera',
'bsf-info-box', 'logo-slider',
'ourteam', 'embedyt', 'gallery', 'bsf-info-box', 'tweet', 'blog', 'portfolio',
'peepso_activity', 'peepso_profile', 'peepso_group'
);
if ( defined( 'TABLEPRESS_ABSPATH' ) && class_exists('\\TablePress') ) {
$ignore_shortcodes[] = 'table';
}
foreach ( $ignore_shortcodes as $shortcode ) {
$this->temporaryDisableShortcode($shortcode);
}
$content = do_shortcode( $content );
// WP 4.2 emoji strip
if ( function_exists( 'wp_encode_emoji' ) ) {
$content = wp_encode_emoji( $content );
}
if ( defined( 'TABLEPRESS_ABSPATH' ) ) {
unset( $tp_controller );
}
if ( defined( 'WP_TABLE_RELOADED_ABSPATH' ) ) {
unset( $wpt_reloaded );
}
$this->enableDisabledShortcodes();
return apply_filters( 'asp_index_after_shortcode_execution', $content, $post );
}
private function temporaryDisableShortcode($tag) {
global $shortcode_tags;
if ( array_key_exists( $tag, $shortcode_tags ) ) {
$this->temporary_shortcode_tags[$tag] = $shortcode_tags[$tag];
$shortcode_tags[ $tag ] = array( $this, 'return_empty_string' );
}
}
private function enableDisabledShortcodes() {
global $shortcode_tags;
foreach ($this->temporary_shortcode_tags as $tag => $callback) {
$shortcode_tags[$tag] = $callback;
}
}
private function parseTablePressShortcodes( $content ): string {
$regex = '/\[table[^\]]*id\=[\'"]{0,1}([0-9]+)[\'"]{0,1}?[^\]]*\]/';
$tables = json_decode(get_option('tablepress_tables'), true);
if ( !is_null($tables) && isset($tables['table_post']) && preg_match_all($regex, $content, $matches) > 0) {
$return = array();
foreach ( $matches[1] as $table_id ) {
$data = json_decode( get_post_field('post_content', $tables['table_post'][$table_id]), true );
if ( $data !== null ) {
$return[] = Str::anyToString($data);
}
}
return implode(' ', $return);
}
return '';
}
/**
* An empty function to override individual shortcodes. This must be a public method.
*
* @return string
* @noinspection PhpUnused
*/
function return_empty_string(): string {
return "";
}
}
}

View File

@@ -0,0 +1,590 @@
<?php
namespace WPDRMS\ASP\Index;
use WPDRMS\ASP\Utils\Html;
use WPDRMS\ASP\Utils\Inflect\InflectController;
use WPDRMS\ASP\Utils\MB;
use WPDRMS\ASP\Utils\Str;
/**
* @phpstan-type TokenizerArgs array{
* min_word_length: int,
* use_stopwords: bool,
* stopwords: string[],
* synonyms_as_keywords: bool,
* inflect: bool,
* language: string,
* }
*/
class Tokenizer {
/**
* @var string unique random string for special replacements
*/
private string $randstr = 'wpivdny3htnydqd6mlyg';
/**
* @var TokenizerArgs
*/
private array $args;
/**
* @var string[]
*/
public static array $additional_keywords_pattern = array(
'"',
"'",
'`',
'',
'',
'”',
'“',
'«',
'»',
'+',
'.',
',',
':',
'-',
'_',
'=',
'%',
'(',
')',
'{',
'}',
'*',
'[',
']',
'|',
'&',
'/',
);
/**
* @param TokenizerArgs $args
*/
public function __construct( array $args ) {
$defaults = array(
'min_word_length' => 2,
'use_stopwords' => false,
'stopwords' => array(),
'synonyms_as_keywords' => false,
'inflect' => false,
'language' => 'english',
);
$this->args = wp_parse_args( $args, $defaults ); // @phpstan-ignore-line
}
public function setLanguage(string $language): void {
$this->args['language'] = $language;
}
/**
* Performs a simple trimming, character removal on a string
*
* @param $str
* @param $post
* @return string
*/
function tokenizeSimple( $str, $post ): string {
if ( function_exists( 'mb_internal_encoding' ) ) {
mb_internal_encoding( 'UTF-8' );
}
$str = Str::anyToString($str);
$str = Html::toTxt( $str );
$str = strip_tags( $str );
$str = stripslashes( $str );
// Non breakable spaces to regular spaces
$str = preg_replace('/\xc2\xa0/', ' ', $str);
$str = preg_replace( '/[[:space:]]+/', ' ', $str );
$str = str_replace( array( "\n", "\r", ' ' ), ' ', $str );
// Turkish uppercase I does not lowercase correctly
$str = str_replace( 'İ', 'i', $str );
$str = MB::strtolower( $str );
$str = trim($str);
$str = Content::hebrewUnvocalize($str);
$str = Content::arabicRemoveDiacritics($str);
$negative_keywords = $this->getNegativeWords($post);
foreach ( $negative_keywords as $negative_keyword ) {
// If there is a negative keyword within, this case is over
if ( strpos($str, $negative_keyword) !== false ) {
return '';
}
}
$stop_words = $this->getStopWords();
foreach ( $stop_words as $stop_word ) {
// whole word matches only
if ( preg_match('/\b' . $stop_word . '\b/', $str, $match, PREG_OFFSET_CAPTURE) ) {
return '';
}
}
return $str;
}
/**
* Performs a simple trimming, character removal on a string, but returns array of keywords
* by the separator character
*
* @param $str
* @param $post
* @return array
*/
function tokenizePhrases( $str, $post, $word_separator = ',' ): array {
if ( function_exists( 'mb_internal_encoding' ) ) {
mb_internal_encoding( 'UTF-8' );
}
$args = $this->args;
$str = Str::anyToString($str);
$str = Html::toTxt( $str );
$str = strip_tags( $str );
$str = stripslashes( $str );
// Non breakable spaces to regular spaces
$str = preg_replace('/\xc2\xa0/', ' ', $str);
$str = preg_replace( '/[[:space:]]+/', ' ', $str );
$str = str_replace( array( "\n", "\r", ' ' ), ' ', $str );
// Turkish uppercase I does not lowercase correctly
$str = str_replace( 'İ', 'i', $str );
$str = MB::strtolower( $str );
$str = trim($str);
$str = Content::hebrewUnvocalize($str);
$str = Content::arabicRemoveDiacritics($str);
$words = explode($word_separator, $str);
$words = array_map('trim', $words);
$words = array_filter(
$words,
function ( $word ) {
return \WPDRMS\ASP\Utils\MB::strlen($word);
}
);
$keywords = array();
while ( ( $c_word = array_shift($words) ) !== null ) {
$c_word = trim($c_word);
if ( $c_word == '' || MB::strlen($c_word) < $args['min_word_length'] ) {
continue;
}
// Numerics won't work otherwise, need to trim that later
if ( is_numeric($c_word) ) {
$c_word = ' ' . $c_word;
}
if ( array_key_exists($c_word, $keywords) ) {
++$keywords[ $c_word ][1];
} else {
$keywords[ $c_word ] = array( $c_word, 1 );
}
}
unset($c_word);
return $keywords;
}
/**
* Performs a keyword extraction on the given content string.
*
* @return array of keywords $keyword = array( 'keyword', {count} )
*/
function tokenize( $str, $post = false, $lang = '' ): array {
if ( is_array( $str ) ) {
$str = Str::anyToString( $str );
}
if ( function_exists('mb_strlen') ) {
$fn_strlen = 'mb_strlen';
} else {
$fn_strlen = 'strlen';
}
$args = $this->args;
if ( function_exists( 'mb_internal_encoding' ) ) {
mb_internal_encoding( 'UTF-8' );
}
$str = apply_filters( 'asp_indexing_string_pre_process', $str );
$str = Html::toTxt( $str );
$str = wp_specialchars_decode( $str );
$str = strip_tags( $str );
$str = stripslashes( $str );
// Replace non-word boundary dots with a unique string + 'd'
/** @noinspection RegExpRedundantEscape */
$str = preg_replace('/([0-9])[\.]([0-9])/', '$1' . $this->randstr . 'd$2', $str);
// Remove potentially dangerous or unusable characters
$str = str_replace(
array(
'·',
'…',
'€',
'&shy;',
'·',
'…',
'®',
'©',
'™',
"\xC2\xAD",
),
'',
$str
);
$str = str_replace(
array(
'. ', // dot followed by space as boundary, otherwise it might be a part of the word
', ', // comma followed by space only, otherwise it might be a word part
'<',
'>',
'†',
'‡',
'‰',
'',
'™',
'¡',
'¢',
'¤',
'¥',
'¦',
'§',
'¨',
'©',
'ª',
'«',
'¬',
'®',
'¯',
'°',
'±',
'¹',
'²',
'³',
'¶',
'·',
'º',
'»',
'¼',
'½',
'¾',
'¿',
'÷',
'•',
'…',
'←',
'←',
'↑',
'→',
'↓',
'↔',
'↵',
'⇐',
'⇑',
'⇒',
'⇓',
'⇔',
'√',
'∝',
'∞',
'∠',
'∧',
'',
'∂',
'∃',
'∅',
'',
'∩',
'',
'∫',
'∴',
'',
'≅',
'≈',
'≠',
'≡',
'≤',
'≥',
'⊂',
'⊃',
'⊄',
'⊆',
'⊇',
'⊕',
'⊗',
'⊥',
'◊',
'♠',
'♣',
'♥',
'♦',
'🔴',
'',
'◊',
'〈',
'〉',
'⌊',
'⌋',
'⌈',
'⌉',
'⋅',
'ˇ',
'°',
'~',
'Ë›',
'Ëť',
'¸',
'§',
'¨',
'’',
'‘',
'”',
'“',
'„',
'´',
'—',
'–',
'Ă—',
'&#8217;',
'&#128308;',
'&nbsp;',
"\n",
"\r",
'& ',
'\\',
'^',
'?',
'!',
';',
chr( 194 ) . chr( 160 ),
),
' ',
$str
);
$str = str_replace( 'Ăź', 'ss', $str );
// Turkish uppercase I does not lowercase correctly
$special_replace = array(
'İ' => 'i',
'—' => '-',
);
$str = str_replace( array_keys($special_replace), array_values($special_replace), $str );
// Any yet undefined punctuation
// $str = preg_replace( '/[[:punct:]]+/u', ' ', $str );
// Non breakable spaces to regular spaces
$str = preg_replace('/\xc2\xa0/', ' ', $str);
// Any remaining multiple space characters
$str = preg_replace( '/[[:space:]]+/', ' ', $str );
$str = MB::strtolower($str);
$str = Content::hebrewUnvocalize($str);
$str = Content::arabicRemoveDiacritics($str);
// $str = preg_replace('/[^\p{L}0-9 ]/', ' ', $str);
$str = str_replace( "\xEF\xBB\xBF", '', $str );
$str = trim( preg_replace( '/\s+/', ' ', $str ) );
// Set back the non-word boundary dots
$str = str_replace( $this->randstr . 'd', '.', $str );
$str = apply_filters( 'asp_indexing_string_post_process', $str );
$words = explode( ' ', $str );
// Remove punctuation marks + some extra from the start and the end of words
// Characters, which should not be standalone (but can be in start on end)
$non_standalone_strings = array( '$', '€', '£', '%' );
// Additional keywords, should not be standalone
$additional_keywords_string = implode('', array_diff(self::$additional_keywords_pattern, $non_standalone_strings));
foreach ( $words as $wk => &$ww ) {
$ww = MB::trim($ww, $additional_keywords_string);
if ( $ww == '' || in_array($ww, $non_standalone_strings ) ) {
unset($words[ $wk ]);
}
}
unset($wk);
unset($ww);
// Get additional words if available
$additional_words = array();
$started = microtime(true);
foreach ( $words as $ww ) {
// This operation can be costly, so limit to 3 seconds just to be sure
if ( ( microtime(true) - $started ) > 3 ) {
break;
}
// ex.: 123-45-678 to 123, 45, 678
$ww1 = str_replace(self::$additional_keywords_pattern, ' ', $ww);
$wa = explode(' ', $ww1);
if ( count($wa) > 1 ) {
foreach ( $wa as $wak => $wav ) {
$wav = trim(preg_replace( '/[[:space:]]+/', ' ', $wav ));
if ( $wav != '' && !in_array($wav, $words) ) {
$wa[ $wak ] = $wav;
} else {
unset($wa[ $wak ]);
}
}
$additional_words = array_merge($additional_words, $wa);
}
// ex.: 123-45-678 to 12345678
$ww2 = str_replace(self::$additional_keywords_pattern, '', $ww);
if ( $ww2 !== '' && $ww2 != $ww && !in_array($ww2, $words) && !in_array($ww2, $additional_words) ) {
$additional_words[] = $ww2;
}
// Accent removal and transliteration
$transliterated = str_replace(array( 'ʾ', "'", '"' ), '', Str::removeAccents($ww));
if (
$transliterated !== '' &&
$transliterated !== $ww &&
!in_array($transliterated, $words, true) &&
!in_array($transliterated, $additional_words, true)
) {
$additional_words[] = $transliterated;
}
}
// Append them after the words array
$words = array_merge($words, $additional_words);
// Inflections
if ( $this->args['inflect'] && !empty($words) ) {
$words = array_merge(
$words,
InflectController::instance()->get($words, $this->args['language'])
);
}
/**
* Apply synonyms for the whole string instead of the words, because
* synonyms can be multi-keyword phrases too
*/
$syn_inst = \WPDRMS\ASP\Synonyms\Manager::getInstance();
if ( $syn_inst->exists() ) {
if ( $this->args['synonyms_as_keywords'] == 1 ) {
$syn_inst->synonymsAsKeywords();
}
$additional_words_by_synonyms = array();
$synonyms = $syn_inst->get();
// If the langauge is set
if ( $lang != '' && isset($synonyms[ $lang ]) ) {
foreach ( $synonyms[ $lang ] as $keyword => $synonyms_arr ) {
if ( preg_match('/\b' . preg_quote($keyword) . '\b/u', $str) ) {
$additional_words_by_synonyms = array_merge($additional_words_by_synonyms, $synonyms_arr);
}
}
}
unset($keyword, $synonyms_arr);
// Also for the "default" aka "any"
if ( isset($synonyms['default']) ) {
foreach ( $synonyms['default'] as $keyword => $synonyms_arr ) {
if ( preg_match('/\b' . preg_quote($keyword) . '\b/u', $str) ) {
$additional_words_by_synonyms = array_merge($additional_words_by_synonyms, $synonyms_arr);
}
}
}
if ( count($additional_words_by_synonyms) > 0 ) {
$words = array_merge($words, $additional_words_by_synonyms);
}
}
$stop_words = $this->getStopWords();
$negative_keywords = $this->getNegativeWords($post);
$keywords = array();
foreach ( $words as $c_word ) {
$c_word = trim($c_word);
if ( $c_word == '' || $fn_strlen($c_word) < $args['min_word_length'] ) {
continue;
}
// Only whole word matches for stop-words
if ( !empty($stop_words) && in_array($c_word, $stop_words) ) {
continue;
}
// Partial matches for negative keywords
foreach ( $negative_keywords as $negative_keyword ) {
if ( strpos($c_word, $negative_keyword) !== false ) {
continue 2;
}
}
// Numerics won't work otherwise, need to trim that later
if ( is_numeric($c_word) ) {
$c_word = ' ' . $c_word;
}
if ( array_key_exists($c_word, $keywords) ) {
++$keywords[ $c_word ][1];
} else {
$keywords[ $c_word ] = array( $c_word, 1 );
}
}
unset($c_word);
return apply_filters( 'asp_indexing_keywords', $keywords );
}
/**
* Returns the stop words
*/
private function getStopWords(): array {
$stop_words = array();
// Only compare to common words if $restrict is set to false
if ( $this->args['use_stopwords'] == 1 && $this->args['stopwords'] != '' ) {
$this->args['stopwords'] = str_replace(' ', '', $this->args['stopwords']);
$stop_words = explode( ',', $this->args['stopwords'] );
}
$stop_words = array_unique( $stop_words );
foreach ( $stop_words as $sk => &$sv ) {
$sv = trim($sv);
if ( $sv == '' || MB::strlen($sv) < $this->args['min_word_length'] ) {
unset($stop_words[ $sk ]);
}
}
return $stop_words;
}
/**
* Returns negative keywords for the current post object
*/
private function getNegativeWords( $post ): array {
// Post level stop-words, negative keywords
if ( $post !== false ) {
$negative_keywords = get_post_meta($post->ID, '_asp_negative_keywords', true);
if ( !empty($negative_keywords) ) {
$negative_keywords = trim( preg_replace('/\s+/', ' ', $negative_keywords) );
$negative_keywords = explode(' ', $negative_keywords);
$negative_keywords = array_filter($negative_keywords, fn( $keyword )=>$keyword !=='');
return array_unique($negative_keywords);
}
}
return array();
}
}