Commit inicial - WordPress Análisis de Precios Unitarios

- WordPress core y plugins
- Tema Twenty Twenty-Four configurado
- Plugin allow-unfiltered-html.php simplificado
- .gitignore configurado para excluir wp-config.php y uploads

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
root
2025-11-03 21:04:30 -06:00
commit a22573bf0b
24068 changed files with 4993111 additions and 0 deletions

View File

@@ -0,0 +1,394 @@
<?php
use WPDRMS\ASP\Utils\FileManager;
if (!defined('ABSPATH')) die('-1');
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
This code is an improved version of what can be found at:
http://www.webcheatsheet.com/php/reading_clean_text_from_pdf.php
AUTHOR:
- Webcheatsheet.com (Original code)
- Joeri Stegeman (joeri210 [at] yahoo [dot] com) (Class conversion and fixes/adjustments)
https://gist.github.com/neko-fire/7038322
DESCRIPTION:
This is a class to convert PDF files into ASCII text or so called PDF text extraction.
It will ignore anything that is not addressed as text within the PDF and any layout.
Currently supported filters are: ASCIIHexDecode, ASCII85Decode, FlateDecode
PURPOSE(S):
Most likely for people that want their PDF to be searchable.
SYNTAX:
include('class.pdf2text.php');
$a = new PDF2Text();
$a->setFilename('test.pdf');
$a->decodePDF();
echo $a->output();
ALTERNATIVES:
Other excellent options to search within a PDF:
- Apache PDFbox (http://pdfbox.apache.org/). An open source Java solution
- pdflib TET (http://www.pdflib.com/products/tet/)
- Online converter: http://snowtide.com/PDFTextStream
*/
// Original name is PDF2Text, changed for better compatibility
if (!class_exists("ASP_PDF2Text")) {
class ASP_PDF2Text {
// Some settings
var $multibyte = 2; // Use setUnicode(TRUE|FALSE)
var $convertquotes = ENT_QUOTES; // ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
// Variables
var $filename = '';
var $decodedtext = '';
function setFilename($filename) {
// Reset
$this->decodedtext = '';
$this->filename = $filename;
}
function output($echo = false) {
if ($echo) echo $this->decodedtext;
else return $this->decodedtext;
}
function setUnicode($input) {
// 4 for unicode. But 2 should work in most cases just fine
if ($input == true) $this->multibyte = 4;
else $this->multibyte = 2;
}
function decodePDF() {
// WordPress specific
$infile = file_exists($this->filename) ? FileManager::instance()->read($this->filename) : '';
// Read the data from pdf file
if (empty($infile))
return "";
// Get all text data.
$transformations = array();
$texts = array();
// Get the list of all objects.
$objects = preg_split("/[\r\n|\r|\n]endobj/", $infile);
// Select objects with streams.
for ($i = 0; $i < count($objects); $i++) {
$currentObject = $objects[$i];
// Check if an object includes data stream.
$stream = preg_split("/[\r\n|\r|\n]endstream[\r\n|\r|\n]/", $currentObject);
if ( count($stream) > 0 ) {
$stream = preg_split("/stream[\r\n|\r|\n]/", $stream[0]);
if (count($stream) > 1) {
$stream = ltrim($stream[1]);
// Check object parameters and look for text data.
$options = $this->getObjectOptions($currentObject);
if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"])))
continue;
// Hack, length doesnt always seem to be correct
unset($options["Length"]);
// So, we have text data. Decode it.
$data = $this->getDecodedStream($stream, $options);
if (strlen($data)) {
if (preg_match_all("#BT[\n|\r](.*)ET[\n|\r]#ismU", $data, $textContainers)) {
$textContainers = @$textContainers[1];
$this->getDirtyTexts($texts, $textContainers);
} else
$this->getCharTransformations($transformations, $data);
}
}
}
}
// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);
}
function decodeAsciiHex($input) {
$output = "";
$isOdd = true;
$isComment = false;
for ($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++) {
$c = $input[$i];
if ($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
switch ($c) {
case '\0':
case '\t':
case '\r':
case '\f':
case '\n':
case ' ':
break;
case '%':
$isComment = true;
break;
default:
$code = hexdec($c);
if ($code === 0 && $c != '0')
return "";
if ($isOdd)
$codeHigh = $code;
else
$output .= chr($codeHigh * 16 + $code);
$isOdd = !$isOdd;
break;
}
}
if ($input[$i] != '>')
return "";
if ($isOdd)
$output .= chr($codeHigh * 16);
return $output;
}
function decodeAscii85($input) {
$output = "";
$isComment = false;
$ords = array();
for ($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++) {
$c = $input[$i];
if ($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
continue;
if ($c == '%') {
$isComment = true;
continue;
}
if ($c == 'z' && $state === 0) {
$output .= str_repeat(chr(0), 4);
continue;
}
if ($c < '!' || $c > 'u')
return "";
$code = ord($input[$i]) & 0xff;
$ords[$state++] = $code - ord('!');
if ($state == 5) {
$state = 0;
for ($sum = 0, $j = 0; $j < 5; $j++)
$sum = $sum * 85 + $ords[$j];
for ($j = 3; $j >= 0; $j--)
$output .= chr($sum >> ($j * 8));
}
}
if ($state === 1)
return "";
elseif ($state > 1) {
for ($i = 0, $sum = 0; $i < $state; $i++)
$sum += ($ords[$i] + ($i == $state - 1)) * pow(85, 4 - $i);
for ($i = 0; $i < $state - 1; $i++)
$output .= chr($sum >> ((3 - $i) * 8));
}
return $output;
}
function decodeFlate($input) {
return @gzuncompress($input);
}
function getObjectOptions($object) {
$options = array();
if (preg_match("#<<(.*)>>#ismU", $object, $options)) {
$options = explode("/", $options[1]);
@array_shift($options);
$o = array();
for ($j = 0; $j < @count($options); $j++) {
$options[$j] = preg_replace("#\s+#", " ", trim($options[$j]));
if (strpos($options[$j], " ") !== false) {
$parts = explode(" ", $options[$j]);
$o[$parts[0]] = $parts[1];
} else
$o[$options[$j]] = true;
}
$options = $o;
unset($o);
}
return $options;
}
function getDecodedStream($stream, $options) {
$data = "";
if (empty($options["Filter"]))
$data = $stream;
else {
$length = !empty($options["Length"]) ? $options["Length"] : strlen($stream);
$_stream = substr($stream, 0, $length);
foreach ($options as $key => $value) {
if ($key == "ASCIIHexDecode")
$_stream = $this->decodeAsciiHex($_stream);
if ($key == "ASCII85Decode")
$_stream = $this->decodeAscii85($_stream);
if ($key == "FlateDecode")
$_stream = $this->decodeFlate($_stream);
if ($key == "Crypt") { // TO DO
}
}
$data = $_stream;
}
return $data;
}
function getDirtyTexts(&$texts, $textContainers) {
for ($j = 0; $j < count($textContainers); $j++) {
if (preg_match_all("#\[(.*)\]\s*TJ[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
elseif (preg_match_all("#T[d|w|m|f]\s*(\(.*\))\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
elseif (preg_match_all("#T[d|w|m|f]\s*(\[.*\])\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
}
}
function getCharTransformations(&$transformations, $stream) {
preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
for ($j = 0; $j < count($chars); $j++) {
$count = $chars[$j][1];
$current = explode("\n", trim($chars[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{2,4})>\s+<([0-9a-f]{4,512})>#is", trim($current[$k]), $map))
$transformations[str_pad($map[1], 4, "0")] = $map[2];
}
}
for ($j = 0; $j < count($ranges); $j++) {
$count = $ranges[$j][1];
$current = explode("\n", trim($ranges[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+<([0-9a-f]{4})>#is", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$_from = hexdec($map[3]);
for ($m = $from, $n = 0; $m <= $to; $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", $_from + $n);
} elseif (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+\[(.*)\]#ismU", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$parts = preg_split("#\s+#", trim($map[3]));
for ($m = $from, $n = 0; $m <= $to && $n < count($parts); $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", hexdec($parts[$n]));
}
}
}
}
function getTextUsingTransformations($texts, $transformations) {
$document = "";
for ($i = 0; $i < count($texts); $i++) {
$isHex = false;
$isPlain = false;
$hex = "";
$plain = "";
for ($j = 0; $j < strlen($texts[$i]); $j++) {
$c = $texts[$i][$j];
switch ($c) {
case "<":
$hex = "";
$isHex = true;
break;
case ">":
$hexs = str_split($hex, $this->multibyte); // 2 or 4 (UTF8 or ISO)
for ($k = 0; $k < count($hexs); $k++) {
$chex = str_pad($hexs[$k], $this->multibyte, "0"); // Add tailing zero
if (isset($transformations[$chex]))
$chex = $transformations[$chex];
$document .= html_entity_decode("&#x" . $chex . ";", ENT_QUOTES | ENT_XML1, 'UTF-8');
}
$isHex = false;
break;
case "(":
$plain = "";
$isPlain = true;
break;
case ")":
$document .= $plain;
$isPlain = false;
break;
case "\\":
$c2 = $texts[$i][$j + 1];
if (in_array($c2, array("\\", "(", ")"))) $plain .= $c2;
elseif ($c2 == "n") $plain .= '\n';
elseif ($c2 == "r") $plain .= '\r';
elseif ($c2 == "t") $plain .= '\t';
elseif ($c2 == "b") $plain .= '\b';
elseif ($c2 == "f") $plain .= '\f';
elseif ($c2 >= '0' && $c2 <= '9') {
$oct = preg_replace("#[^0-9]#", "", substr($texts[$i], $j + 1, 3));
$j += strlen($oct) - 1;
$plain .= html_entity_decode("&#" . octdec($oct) . ";", $this->convertquotes);
}
$j++;
break;
default:
if ($isHex)
$hex .= $c;
if ($isPlain)
$plain .= $c;
break;
}
}
$document .= "\n";
}
return $document;
}
}
}