Commit inicial - WordPress Análisis de Precios Unitarios

- WordPress core y plugins - Tema Twenty Twenty-Four configurado - Plugin allow-unfiltered-html.php simplificado - .gitignore configurado para excluir wp-config.php y uploads 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-03 21:04:30 -06:00
commit a22573bf0b
24068 changed files with 4993111 additions and 0 deletions
--- a/wp-content/plugins/ajax-search-pro/includes/externals/class.pdf2txt.php
+++ b/wp-content/plugins/ajax-search-pro/includes/externals/class.pdf2txt.php
@@ -0,0 +1,394 @@
+<?php
+
+use WPDRMS\ASP\Utils\FileManager;
+
+if (!defined('ABSPATH')) die('-1');
+/*
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+This code is an improved version of what can be found at:
+http://www.webcheatsheet.com/php/reading_clean_text_from_pdf.php
+
+AUTHOR:
+- Webcheatsheet.com (Original code)
+- Joeri Stegeman (joeri210 [at] yahoo [dot] com) (Class conversion and fixes/adjustments)
+  https://gist.github.com/neko-fire/7038322
+
+DESCRIPTION:
+This is a class to convert PDF files into ASCII text or so called PDF text extraction.
+It will ignore anything that is not addressed as text within the PDF and any layout.
+Currently supported filters are: ASCIIHexDecode, ASCII85Decode, FlateDecode
+
+PURPOSE(S):
+Most likely for people that want their PDF to be searchable.
+
+SYNTAX:
+include('class.pdf2text.php');
+$a = new PDF2Text();
+$a->setFilename('test.pdf');
+$a->decodePDF();
+echo $a->output();
+
+ALTERNATIVES:
+Other excellent options to search within a PDF:
+- Apache PDFbox (http://pdfbox.apache.org/). An open source Java solution
+- pdflib TET (http://www.pdflib.com/products/tet/)
+- Online converter: http://snowtide.com/PDFTextStream
+*/
+
+// Original name is PDF2Text, changed for better compatibility
+if (!class_exists("ASP_PDF2Text")) {
+    class ASP_PDF2Text {
+        // Some settings
+        var $multibyte = 2; // Use setUnicode(TRUE|FALSE)
+        var $convertquotes = ENT_QUOTES; // ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
+
+        // Variables
+        var $filename = '';
+        var $decodedtext = '';
+
+        function setFilename($filename) {
+            // Reset
+            $this->decodedtext = '';
+            $this->filename = $filename;
+        }
+
+        function output($echo = false) {
+            if ($echo) echo $this->decodedtext;
+            else return $this->decodedtext;
+        }
+
+        function setUnicode($input) {
+            // 4 for unicode. But 2 should work in most cases just fine
+            if ($input == true) $this->multibyte = 4;
+            else $this->multibyte = 2;
+        }
+
+        function decodePDF() {
+            // WordPress specific
+            $infile = file_exists($this->filename) ? FileManager::instance()->read($this->filename) : '';
+
+            // Read the data from pdf file
+            if (empty($infile))
+                return "";
+
+            // Get all text data.
+            $transformations = array();
+            $texts = array();
+
+            // Get the list of all objects.
+            $objects = preg_split("/[\r\n|\r|\n]endobj/", $infile);
+
+            // Select objects with streams.
+            for ($i = 0; $i < count($objects); $i++) {
+                $currentObject = $objects[$i];
+
+                // Check if an object includes data stream.
+                $stream = preg_split("/[\r\n|\r|\n]endstream[\r\n|\r|\n]/", $currentObject);
+                if ( count($stream) > 0 ) {
+                    $stream = preg_split("/stream[\r\n|\r|\n]/", $stream[0]);
+                    if (count($stream) > 1) {
+                        $stream = ltrim($stream[1]);
+
+                        // Check object parameters and look for text data.
+                        $options = $this->getObjectOptions($currentObject);
+
+                        if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"])))
+                            continue;
+
+                        // Hack, length doesnt always seem to be correct
+                        unset($options["Length"]);
+
+                        // So, we have text data. Decode it.
+                        $data = $this->getDecodedStream($stream, $options);
+                        if (strlen($data)) {
+                            if (preg_match_all("#BT[\n|\r](.*)ET[\n|\r]#ismU", $data, $textContainers)) {
+                                $textContainers = @$textContainers[1];
+                                $this->getDirtyTexts($texts, $textContainers);
+                            } else
+                                $this->getCharTransformations($transformations, $data);
+                        }
+                    }
+                }
+            }
+
+            // Analyze text blocks taking into account character transformations and return results.
+            $this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);
+        }
+
+
+        function decodeAsciiHex($input) {
+            $output = "";
+
+            $isOdd = true;
+            $isComment = false;
+
+            for ($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++) {
+                $c = $input[$i];
+
+                if ($isComment) {
+                    if ($c == '\r' || $c == '\n')
+                        $isComment = false;
+                    continue;
+                }
+
+                switch ($c) {
+                    case '\0':
+                    case '\t':
+                    case '\r':
+                    case '\f':
+                    case '\n':
+                    case ' ':
+                        break;
+                    case '%':
+                        $isComment = true;
+                        break;
+
+                    default:
+                        $code = hexdec($c);
+                        if ($code === 0 && $c != '0')
+                            return "";
+
+                        if ($isOdd)
+                            $codeHigh = $code;
+                        else
+                            $output .= chr($codeHigh * 16 + $code);
+
+                        $isOdd = !$isOdd;
+                        break;
+                }
+            }
+
+            if ($input[$i] != '>')
+                return "";
+
+            if ($isOdd)
+                $output .= chr($codeHigh * 16);
+
+            return $output;
+        }
+
+        function decodeAscii85($input) {
+            $output = "";
+
+            $isComment = false;
+            $ords = array();
+
+            for ($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++) {
+                $c = $input[$i];
+
+                if ($isComment) {
+                    if ($c == '\r' || $c == '\n')
+                        $isComment = false;
+                    continue;
+                }
+
+                if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
+                    continue;
+                if ($c == '%') {
+                    $isComment = true;
+                    continue;
+                }
+                if ($c == 'z' && $state === 0) {
+                    $output .= str_repeat(chr(0), 4);
+                    continue;
+                }
+                if ($c < '!' || $c > 'u')
+                    return "";
+
+                $code = ord($input[$i]) & 0xff;
+                $ords[$state++] = $code - ord('!');
+
+                if ($state == 5) {
+                    $state = 0;
+                    for ($sum = 0, $j = 0; $j < 5; $j++)
+                        $sum = $sum * 85 + $ords[$j];
+                    for ($j = 3; $j >= 0; $j--)
+                        $output .= chr($sum >> ($j * 8));
+                }
+            }
+            if ($state === 1)
+                return "";
+            elseif ($state > 1) {
+                for ($i = 0, $sum = 0; $i < $state; $i++)
+                    $sum += ($ords[$i] + ($i == $state - 1)) * pow(85, 4 - $i);
+                for ($i = 0; $i < $state - 1; $i++)
+                    $output .= chr($sum >> ((3 - $i) * 8));
+            }
+
+            return $output;
+        }
+
+        function decodeFlate($input) {
+            return @gzuncompress($input);
+        }
+
+        function getObjectOptions($object) {
+            $options = array();
+
+            if (preg_match("#<<(.*)>>#ismU", $object, $options)) {
+                $options = explode("/", $options[1]);
+                @array_shift($options);
+
+                $o = array();
+                for ($j = 0; $j < @count($options); $j++) {
+                    $options[$j] = preg_replace("#\s+#", " ", trim($options[$j]));
+                    if (strpos($options[$j], " ") !== false) {
+                        $parts = explode(" ", $options[$j]);
+                        $o[$parts[0]] = $parts[1];
+                    } else
+                        $o[$options[$j]] = true;
+                }
+                $options = $o;
+                unset($o);
+            }
+
+            return $options;
+        }
+
+        function getDecodedStream($stream, $options) {
+            $data = "";
+            if (empty($options["Filter"]))
+                $data = $stream;
+            else {
+                $length = !empty($options["Length"]) ? $options["Length"] : strlen($stream);
+                $_stream = substr($stream, 0, $length);
+
+                foreach ($options as $key => $value) {
+                    if ($key == "ASCIIHexDecode")
+                        $_stream = $this->decodeAsciiHex($_stream);
+                    if ($key == "ASCII85Decode")
+                        $_stream = $this->decodeAscii85($_stream);
+                    if ($key == "FlateDecode")
+                        $_stream = $this->decodeFlate($_stream);
+                    if ($key == "Crypt") { // TO DO
+                    }
+                }
+                $data = $_stream;
+            }
+
+            return $data;
+        }
+
+        function getDirtyTexts(&$texts, $textContainers) {
+
+            for ($j = 0; $j < count($textContainers); $j++) {
+                if (preg_match_all("#\[(.*)\]\s*TJ[\n|\r]#ismU", $textContainers[$j], $parts))
+                    $texts = array_merge($texts, @$parts[1]);
+                elseif (preg_match_all("#T[d|w|m|f]\s*(\(.*\))\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
+                    $texts = array_merge($texts, @$parts[1]);
+                elseif (preg_match_all("#T[d|w|m|f]\s*(\[.*\])\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
+                    $texts = array_merge($texts, @$parts[1]);
+            }
+        }
+
+        function getCharTransformations(&$transformations, $stream) {
+            preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
+            preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
+
+            for ($j = 0; $j < count($chars); $j++) {
+                $count = $chars[$j][1];
+                $current = explode("\n", trim($chars[$j][2]));
+                for ($k = 0; $k < $count && $k < count($current); $k++) {
+                    if (preg_match("#<([0-9a-f]{2,4})>\s+<([0-9a-f]{4,512})>#is", trim($current[$k]), $map))
+                        $transformations[str_pad($map[1], 4, "0")] = $map[2];
+                }
+            }
+            for ($j = 0; $j < count($ranges); $j++) {
+                $count = $ranges[$j][1];
+                $current = explode("\n", trim($ranges[$j][2]));
+                for ($k = 0; $k < $count && $k < count($current); $k++) {
+                    if (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+<([0-9a-f]{4})>#is", trim($current[$k]), $map)) {
+                        $from = hexdec($map[1]);
+                        $to = hexdec($map[2]);
+                        $_from = hexdec($map[3]);
+
+                        for ($m = $from, $n = 0; $m <= $to; $m++, $n++)
+                            $transformations[sprintf("%04X", $m)] = sprintf("%04X", $_from + $n);
+                    } elseif (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+\[(.*)\]#ismU", trim($current[$k]), $map)) {
+                        $from = hexdec($map[1]);
+                        $to = hexdec($map[2]);
+                        $parts = preg_split("#\s+#", trim($map[3]));
+
+                        for ($m = $from, $n = 0; $m <= $to && $n < count($parts); $m++, $n++)
+                            $transformations[sprintf("%04X", $m)] = sprintf("%04X", hexdec($parts[$n]));
+                    }
+                }
+            }
+        }
+
+        function getTextUsingTransformations($texts, $transformations) {
+            $document = "";
+            for ($i = 0; $i < count($texts); $i++) {
+                $isHex = false;
+                $isPlain = false;
+
+                $hex = "";
+                $plain = "";
+                for ($j = 0; $j < strlen($texts[$i]); $j++) {
+                    $c = $texts[$i][$j];
+                    switch ($c) {
+                        case "<":
+                            $hex = "";
+                            $isHex = true;
+                            break;
+                        case ">":
+                            $hexs = str_split($hex, $this->multibyte); // 2 or 4 (UTF8 or ISO)
+                            for ($k = 0; $k < count($hexs); $k++) {
+                                $chex = str_pad($hexs[$k], $this->multibyte, "0"); // Add tailing zero
+                                if (isset($transformations[$chex]))
+                                    $chex = $transformations[$chex];
+                                $document .= html_entity_decode("&#x" . $chex . ";", ENT_QUOTES | ENT_XML1, 'UTF-8');
+                            }
+                            $isHex = false;
+                            break;
+                        case "(":
+                            $plain = "";
+                            $isPlain = true;
+                            break;
+                        case ")":
+                            $document .= $plain;
+                            $isPlain = false;
+                            break;
+                        case "\\":
+                            $c2 = $texts[$i][$j + 1];
+                            if (in_array($c2, array("\\", "(", ")"))) $plain .= $c2;
+                            elseif ($c2 == "n") $plain .= '\n';
+                            elseif ($c2 == "r") $plain .= '\r';
+                            elseif ($c2 == "t") $plain .= '\t';
+                            elseif ($c2 == "b") $plain .= '\b';
+                            elseif ($c2 == "f") $plain .= '\f';
+                            elseif ($c2 >= '0' && $c2 <= '9') {
+                                $oct = preg_replace("#[^0-9]#", "", substr($texts[$i], $j + 1, 3));
+                                $j += strlen($oct) - 1;
+                                $plain .= html_entity_decode("&#" . octdec($oct) . ";", $this->convertquotes);
+                            }
+                            $j++;
+                            break;
+
+                        default:
+                            if ($isHex)
+                                $hex .= $c;
+                            if ($isPlain)
+                                $plain .= $c;
+                            break;
+                    }
+                }
+                $document .= "\n";
+            }
+
+            return $document;
+        }
+    }
+}