Commit inicial - WordPress Análisis de Precios Unitarios

- WordPress core y plugins
- Tema Twenty Twenty-Four configurado
- Plugin allow-unfiltered-html.php simplificado
- .gitignore configurado para excluir wp-config.php y uploads

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
root
2025-11-03 21:04:30 -06:00
commit a22573bf0b
24068 changed files with 4993111 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
<?php
namespace WPDRMS\ASP\Index;
defined( 'ABSPATH' ) or die( "You can't access this file directly." );
/**
* Class only kept to avoid fatal errors during activation
*
* @deprecated 4.27
*/
class Manager {
function removeDocument( $post_id ) {}
}

View File

@@ -0,0 +1,19 @@
<?php /** @noinspection HttpUrlsUsage */
namespace WPDRMS\ASP\Misc;
/**
* Class only kept to avoid fatal errors during activation
*
* @deprecated 4.27
*/
class OutputBuffer {
function obClose(): bool {
return false;
}
function getInstance() {
return new self();
}
}

View File

@@ -0,0 +1,394 @@
<?php
use WPDRMS\ASP\Utils\FileManager;
if (!defined('ABSPATH')) die('-1');
/*
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
This code is an improved version of what can be found at:
http://www.webcheatsheet.com/php/reading_clean_text_from_pdf.php
AUTHOR:
- Webcheatsheet.com (Original code)
- Joeri Stegeman (joeri210 [at] yahoo [dot] com) (Class conversion and fixes/adjustments)
https://gist.github.com/neko-fire/7038322
DESCRIPTION:
This is a class to convert PDF files into ASCII text or so called PDF text extraction.
It will ignore anything that is not addressed as text within the PDF and any layout.
Currently supported filters are: ASCIIHexDecode, ASCII85Decode, FlateDecode
PURPOSE(S):
Most likely for people that want their PDF to be searchable.
SYNTAX:
include('class.pdf2text.php');
$a = new PDF2Text();
$a->setFilename('test.pdf');
$a->decodePDF();
echo $a->output();
ALTERNATIVES:
Other excellent options to search within a PDF:
- Apache PDFbox (http://pdfbox.apache.org/). An open source Java solution
- pdflib TET (http://www.pdflib.com/products/tet/)
- Online converter: http://snowtide.com/PDFTextStream
*/
// Original name is PDF2Text, changed for better compatibility
if (!class_exists("ASP_PDF2Text")) {
class ASP_PDF2Text {
// Some settings
var $multibyte = 2; // Use setUnicode(TRUE|FALSE)
var $convertquotes = ENT_QUOTES; // ENT_COMPAT (double-quotes), ENT_QUOTES (Both), ENT_NOQUOTES (None)
// Variables
var $filename = '';
var $decodedtext = '';
function setFilename($filename) {
// Reset
$this->decodedtext = '';
$this->filename = $filename;
}
function output($echo = false) {
if ($echo) echo $this->decodedtext;
else return $this->decodedtext;
}
function setUnicode($input) {
// 4 for unicode. But 2 should work in most cases just fine
if ($input == true) $this->multibyte = 4;
else $this->multibyte = 2;
}
function decodePDF() {
// WordPress specific
$infile = file_exists($this->filename) ? FileManager::instance()->read($this->filename) : '';
// Read the data from pdf file
if (empty($infile))
return "";
// Get all text data.
$transformations = array();
$texts = array();
// Get the list of all objects.
$objects = preg_split("/[\r\n|\r|\n]endobj/", $infile);
// Select objects with streams.
for ($i = 0; $i < count($objects); $i++) {
$currentObject = $objects[$i];
// Check if an object includes data stream.
$stream = preg_split("/[\r\n|\r|\n]endstream[\r\n|\r|\n]/", $currentObject);
if ( count($stream) > 0 ) {
$stream = preg_split("/stream[\r\n|\r|\n]/", $stream[0]);
if (count($stream) > 1) {
$stream = ltrim($stream[1]);
// Check object parameters and look for text data.
$options = $this->getObjectOptions($currentObject);
if (!(empty($options["Length1"]) && empty($options["Type"]) && empty($options["Subtype"])))
continue;
// Hack, length doesnt always seem to be correct
unset($options["Length"]);
// So, we have text data. Decode it.
$data = $this->getDecodedStream($stream, $options);
if (strlen($data)) {
if (preg_match_all("#BT[\n|\r](.*)ET[\n|\r]#ismU", $data, $textContainers)) {
$textContainers = @$textContainers[1];
$this->getDirtyTexts($texts, $textContainers);
} else
$this->getCharTransformations($transformations, $data);
}
}
}
}
// Analyze text blocks taking into account character transformations and return results.
$this->decodedtext = $this->getTextUsingTransformations($texts, $transformations);
}
function decodeAsciiHex($input) {
$output = "";
$isOdd = true;
$isComment = false;
for ($i = 0, $codeHigh = -1; $i < strlen($input) && $input[$i] != '>'; $i++) {
$c = $input[$i];
if ($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
switch ($c) {
case '\0':
case '\t':
case '\r':
case '\f':
case '\n':
case ' ':
break;
case '%':
$isComment = true;
break;
default:
$code = hexdec($c);
if ($code === 0 && $c != '0')
return "";
if ($isOdd)
$codeHigh = $code;
else
$output .= chr($codeHigh * 16 + $code);
$isOdd = !$isOdd;
break;
}
}
if ($input[$i] != '>')
return "";
if ($isOdd)
$output .= chr($codeHigh * 16);
return $output;
}
function decodeAscii85($input) {
$output = "";
$isComment = false;
$ords = array();
for ($i = 0, $state = 0; $i < strlen($input) && $input[$i] != '~'; $i++) {
$c = $input[$i];
if ($isComment) {
if ($c == '\r' || $c == '\n')
$isComment = false;
continue;
}
if ($c == '\0' || $c == '\t' || $c == '\r' || $c == '\f' || $c == '\n' || $c == ' ')
continue;
if ($c == '%') {
$isComment = true;
continue;
}
if ($c == 'z' && $state === 0) {
$output .= str_repeat(chr(0), 4);
continue;
}
if ($c < '!' || $c > 'u')
return "";
$code = ord($input[$i]) & 0xff;
$ords[$state++] = $code - ord('!');
if ($state == 5) {
$state = 0;
for ($sum = 0, $j = 0; $j < 5; $j++)
$sum = $sum * 85 + $ords[$j];
for ($j = 3; $j >= 0; $j--)
$output .= chr($sum >> ($j * 8));
}
}
if ($state === 1)
return "";
elseif ($state > 1) {
for ($i = 0, $sum = 0; $i < $state; $i++)
$sum += ($ords[$i] + ($i == $state - 1)) * pow(85, 4 - $i);
for ($i = 0; $i < $state - 1; $i++)
$output .= chr($sum >> ((3 - $i) * 8));
}
return $output;
}
function decodeFlate($input) {
return @gzuncompress($input);
}
function getObjectOptions($object) {
$options = array();
if (preg_match("#<<(.*)>>#ismU", $object, $options)) {
$options = explode("/", $options[1]);
@array_shift($options);
$o = array();
for ($j = 0; $j < @count($options); $j++) {
$options[$j] = preg_replace("#\s+#", " ", trim($options[$j]));
if (strpos($options[$j], " ") !== false) {
$parts = explode(" ", $options[$j]);
$o[$parts[0]] = $parts[1];
} else
$o[$options[$j]] = true;
}
$options = $o;
unset($o);
}
return $options;
}
function getDecodedStream($stream, $options) {
$data = "";
if (empty($options["Filter"]))
$data = $stream;
else {
$length = !empty($options["Length"]) ? $options["Length"] : strlen($stream);
$_stream = substr($stream, 0, $length);
foreach ($options as $key => $value) {
if ($key == "ASCIIHexDecode")
$_stream = $this->decodeAsciiHex($_stream);
if ($key == "ASCII85Decode")
$_stream = $this->decodeAscii85($_stream);
if ($key == "FlateDecode")
$_stream = $this->decodeFlate($_stream);
if ($key == "Crypt") { // TO DO
}
}
$data = $_stream;
}
return $data;
}
function getDirtyTexts(&$texts, $textContainers) {
for ($j = 0; $j < count($textContainers); $j++) {
if (preg_match_all("#\[(.*)\]\s*TJ[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
elseif (preg_match_all("#T[d|w|m|f]\s*(\(.*\))\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
elseif (preg_match_all("#T[d|w|m|f]\s*(\[.*\])\s*Tj[\n|\r]#ismU", $textContainers[$j], $parts))
$texts = array_merge($texts, @$parts[1]);
}
}
function getCharTransformations(&$transformations, $stream) {
preg_match_all("#([0-9]+)\s+beginbfchar(.*)endbfchar#ismU", $stream, $chars, PREG_SET_ORDER);
preg_match_all("#([0-9]+)\s+beginbfrange(.*)endbfrange#ismU", $stream, $ranges, PREG_SET_ORDER);
for ($j = 0; $j < count($chars); $j++) {
$count = $chars[$j][1];
$current = explode("\n", trim($chars[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{2,4})>\s+<([0-9a-f]{4,512})>#is", trim($current[$k]), $map))
$transformations[str_pad($map[1], 4, "0")] = $map[2];
}
}
for ($j = 0; $j < count($ranges); $j++) {
$count = $ranges[$j][1];
$current = explode("\n", trim($ranges[$j][2]));
for ($k = 0; $k < $count && $k < count($current); $k++) {
if (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+<([0-9a-f]{4})>#is", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$_from = hexdec($map[3]);
for ($m = $from, $n = 0; $m <= $to; $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", $_from + $n);
} elseif (preg_match("#<([0-9a-f]{4})>\s+<([0-9a-f]{4})>\s+\[(.*)\]#ismU", trim($current[$k]), $map)) {
$from = hexdec($map[1]);
$to = hexdec($map[2]);
$parts = preg_split("#\s+#", trim($map[3]));
for ($m = $from, $n = 0; $m <= $to && $n < count($parts); $m++, $n++)
$transformations[sprintf("%04X", $m)] = sprintf("%04X", hexdec($parts[$n]));
}
}
}
}
function getTextUsingTransformations($texts, $transformations) {
$document = "";
for ($i = 0; $i < count($texts); $i++) {
$isHex = false;
$isPlain = false;
$hex = "";
$plain = "";
for ($j = 0; $j < strlen($texts[$i]); $j++) {
$c = $texts[$i][$j];
switch ($c) {
case "<":
$hex = "";
$isHex = true;
break;
case ">":
$hexs = str_split($hex, $this->multibyte); // 2 or 4 (UTF8 or ISO)
for ($k = 0; $k < count($hexs); $k++) {
$chex = str_pad($hexs[$k], $this->multibyte, "0"); // Add tailing zero
if (isset($transformations[$chex]))
$chex = $transformations[$chex];
$document .= html_entity_decode("&#x" . $chex . ";", ENT_QUOTES | ENT_XML1, 'UTF-8');
}
$isHex = false;
break;
case "(":
$plain = "";
$isPlain = true;
break;
case ")":
$document .= $plain;
$isPlain = false;
break;
case "\\":
$c2 = $texts[$i][$j + 1];
if (in_array($c2, array("\\", "(", ")"))) $plain .= $c2;
elseif ($c2 == "n") $plain .= '\n';
elseif ($c2 == "r") $plain .= '\r';
elseif ($c2 == "t") $plain .= '\t';
elseif ($c2 == "b") $plain .= '\b';
elseif ($c2 == "f") $plain .= '\f';
elseif ($c2 >= '0' && $c2 <= '9') {
$oct = preg_replace("#[^0-9]#", "", substr($texts[$i], $j + 1, 3));
$j += strlen($oct) - 1;
$plain .= html_entity_decode("&#" . octdec($oct) . ";", $this->convertquotes);
}
$j++;
break;
default:
if ($isHex)
$hex .= $c;
if ($isPlain)
$plain .= $c;
break;
}
}
$document .= "\n";
}
return $document;
}
}
}

View File

@@ -0,0 +1,13 @@
<?php
if (!defined('ABSPATH')) die('-1');
if (!class_exists("ASP_PDFSmalot")) {
class ASP_PDFSmalot {
function __construct() {
include_once( ASP_EXTERNALS_PATH . '/pdf-smalot/autoload.php' );
}
public function getObj() {
return new \Smalot\PdfParser\Parser();
}
}
}

View File

@@ -0,0 +1,481 @@
<?php
defined( 'ABSPATH' ) or die( "You can't access this file directly." );
/**
* RTF parser/formatter
*
* This code reads RTF files and formats the RTF data to HTML.
* Original from: https://github.com/henck/rtf-html-php
*
* PHP version 5
*
* @author Alexander van Oostenrijk
* @copyright 2014 Alexander van Oostenrijk
* @license GNU
* @version 1
* @link http://www.independent-software.com
*
* Sample of use:
*
* $reader = new ASP_RtfReader();
* $rtf = file_get_contents("test.rtf"); // or use a string
* $reader->Parse($rtf);
* //$reader->root->dump(); // to see what the reader read
* $formatter = new ASP_RtfHtml();
* echo $formatter->Format($reader->root);
*
* -----------------------------------------------------------
* Notice: Class names have been prefixed with 'ASP_' to avoid compatibility issiues
* (namespaces cannot be used, as WordPress supports php5.2)
*/
class ASP_RtfElement {
protected function Indent($level) {
for ($i = 0; $i < $level * 2; $i++) echo "&nbsp;";
}
}
class ASP_RtfGroup extends ASP_RtfElement {
public $parent;
public $children;
public function __construct() {
$this->parent = null;
$this->children = array();
}
public function GetType() {
// No children?
if (sizeof($this->children) == 0) return null;
// First child not a control word?
$child = $this->children[0];
if (!$child instanceof ASP_RtfControlWord) return null;
return $child->word;
}
public function IsDestination() {
// No children?
if (sizeof($this->children) == 0) return null;
// First child not a control symbol?
$child = $this->children[0];
if (!$child instanceof ASP_RtfControlSymbol) return null;
return $child->symbol == '*';
}
public function dump($level = 0) {
echo "<div>";
$this->Indent($level);
echo "{";
echo "</div>";
foreach ($this->children as $child) {
if ($child instanceof ASP_RtfGroup) {
if ($child->GetType() == "fonttbl") continue;
if ($child->GetType() == "colortbl") continue;
if ($child->GetType() == "stylesheet") continue;
if ($child->GetType() == "info") continue;
// Skip any pictures:
if (substr($child->GetType(), 0, 4) == "pict") continue;
if ($child->IsDestination()) continue;
}
$child->dump($level + 2);
}
echo "<div>";
$this->Indent($level);
echo "}";
echo "</div>";
}
}
class ASP_RtfControlWord extends ASP_RtfElement {
public $word;
public $parameter;
public function dump($level) {
echo "<div style='color:green'>";
$this->Indent($level);
echo "WORD {$this->word} ({$this->parameter})";
echo "</div>";
}
}
class ASP_RtfControlSymbol extends ASP_RtfElement {
public $symbol;
public $parameter = 0;
public function dump($level) {
echo "<div style='color:blue'>";
$this->Indent($level);
echo "SYMBOL {$this->symbol} ({$this->parameter})";
echo "</div>";
}
}
class ASP_RtfText extends ASP_RtfElement {
public $text;
public function dump($level) {
echo "<div style='color:red'>";
$this->Indent($level);
echo "TEXT {$this->text}";
echo "</div>";
}
}
class ASP_RtfReader {
public $root = null;
protected function GetChar() {
$this->char = null;
if ($this->pos < strlen($this->rtf)) {
$this->char = $this->rtf[$this->pos++];
} else {
$this->err = "Tried to read past EOF, RTF is probably truncated";
}
}
protected function ParseStartGroup() {
// Store state of document on stack.
$group = new ASP_RtfGroup();
if ($this->group != null) $group->parent = $this->group;
if ($this->root == null) {
$this->group = $group;
$this->root = $group;
} else {
array_push($this->group->children, $group);
$this->group = $group;
}
}
protected function is_letter() {
if (ord($this->char) >= 65 && ord($this->char) <= 90) return TRUE;
if (ord($this->char) >= 97 && ord($this->char) <= 122) return TRUE;
return FALSE;
}
protected function is_digit() {
if (ord($this->char) >= 48 && ord($this->char) <= 57) return TRUE;
return FALSE;
}
protected function ParseEndGroup() {
// Retrieve state of document from stack.
$this->group = $this->group->parent;
}
protected function ParseControlWord() {
$this->GetChar();
$word = "";
while ($this->is_letter()) {
$word .= $this->char;
$this->GetChar();
}
// Read parameter (if any) consisting of digits.
// Paramater may be negative.
$parameter = null;
$negative = false;
if ($this->char == '-') {
$this->GetChar();
$negative = true;
}
while ($this->is_digit()) {
if ($parameter == null) $parameter = 0;
$parameter = $parameter * 10 + $this->char;
$this->GetChar();
}
if ($parameter === null) $parameter = 1;
if ($negative) $parameter = -$parameter;
// If this is \u, then the parameter will be followed by
// a character.
if ($word == "u") {
// Ignore space delimiter
if ($this->char == ' ') $this->GetChar();
// if the replacement character is encoded as
// hexadecimal value \'hh then jump over it
if ($this->char == '\\' && $this->rtf[$this->pos] == '\'')
$this->pos = $this->pos + 3;
// Convert to UTF unsigned decimal code
if ($negative) $parameter = 65536 + $parameter;
}
// If the current character is a space, then
// it is a delimiter. It is consumed.
// If it's not a space, then it's part of the next
// item in the text, so put the character back.
else {
if ($this->char != ' ') $this->pos--;
}
$rtfword = new ASP_RtfControlWord();
$rtfword->word = $word;
$rtfword->parameter = $parameter;
array_push($this->group->children, $rtfword);
}
protected function ParseControlSymbol() {
// Read symbol (one character only).
$this->GetChar();
$symbol = $this->char;
// Symbols ordinarily have no parameter. However,
// if this is \', then it is followed by a 2-digit hex-code:
$parameter = 0;
if ($symbol == '\'') {
$this->GetChar();
$parameter = $this->char;
$this->GetChar();
$parameter = hexdec($parameter . $this->char);
}
$rtfsymbol = new ASP_RtfControlSymbol();
$rtfsymbol->symbol = $symbol;
$rtfsymbol->parameter = $parameter;
array_push($this->group->children, $rtfsymbol);
}
protected function ParseControl() {
// Beginning of an RTF control word or control symbol.
// Look ahead by one character to see if it starts with
// a letter (control world) or another symbol (control symbol):
$this->GetChar();
$this->pos--;
if ($this->is_letter())
$this->ParseControlWord();
else
$this->ParseControlSymbol();
}
protected function ParseText() {
// Parse plain text up to backslash or brace,
// unless escaped.
$text = "";
do {
$terminate = false;
$escape = false;
// Is this an escape?
if ($this->char == '\\') {
// Perform lookahead to see if this
// is really an escape sequence.
$this->GetChar();
switch ($this->char) {
case '\\':
$text .= '\\';
break;
case '{':
$text .= '{';
break;
case '}':
$text .= '}';
break;
default:
// Not an escape. Roll back.
$this->pos = $this->pos - 2;
$terminate = true;
break;
}
} else if ($this->char == '{' || $this->char == '}') {
$this->pos--;
$terminate = true;
}
if (!$terminate && !$escape) {
$text .= $this->char;
$this->GetChar();
}
} while (!$terminate && $this->pos < $this->len);
$rtftext = new ASP_RtfText();
$rtftext->text = $text;
// If group does not exist, then this is not a valid RTF file. Throw an exception.
if ($this->group == NULL) {
throw new Exception();
}
array_push($this->group->children, $rtftext);
}
/*
* Attempt to parse an RTF string. Parsing returns TRUE on success or FALSE on failure
*/
public function Parse($rtf) {
if ( function_exists( 'mb_internal_encoding' ) ) {
mb_internal_encoding( "UTF-8" );
}
try {
$this->rtf = utf8_encode( $rtf );
$this->pos = 0;
$this->len = strlen($this->rtf);
$this->group = null;
$this->root = null;
while ($this->pos < $this->len) {
// Read next character:
$this->GetChar();
// Ignore \r and \n
if ($this->char == "\n" || $this->char == "\r") continue;
// What type of character is this?
switch ($this->char) {
case '{':
$this->ParseStartGroup();
break;
case '}':
$this->ParseEndGroup();
break;
case '\\':
$this->ParseControl();
break;
default:
$this->ParseText();
break;
}
}
return TRUE;
} catch (Exception $ex) {
return FALSE;
}
}
}
class ASP_RtfState {
public function __construct() {
$this->Reset();
}
public function Reset() {
$this->bold = false;
$this->italic = false;
$this->underline = false;
$this->end_underline = false;
$this->strike = false;
$this->hidden = false;
$this->fontsize = 0;
}
}
class ASP_RtfHtml {
public function Format($root) {
$this->output = "";
// Keeping track of style modifications
$this->previousState = null;
$this->openedTags = array('span' => False, 'p' => False);
// Create a stack of states:
$this->states = array();
// Put an initial standard state onto the stack:
$this->state = new ASP_RtfState();
array_push($this->states, $this->state);
$this->FormatGroup($root);
return $this->output;
}
protected function FormatGroup($group) {
// Can we ignore this group?
if ($group->GetType() == "fonttbl") return;
elseif ($group->GetType() == "colortbl") return;
elseif ($group->GetType() == "stylesheet") return;
elseif ($group->GetType() == "info") return;
// Skip any pictures:
if (substr($group->GetType(), 0, 4) == "pict") return;
if ($group->IsDestination()) return;
// Push a new state onto the stack:
$this->state = clone $this->state;
array_push($this->states, $this->state);
foreach ($group->children as $child) {
if ($child instanceof ASP_RtfGroup) $this->FormatGroup($child);
elseif ($child instanceof ASP_RtfControlWord) $this->FormatControlWord($child);
elseif ($child instanceof ASP_RtfControlSymbol) $this->FormatControlSymbol($child);
elseif ($child instanceof ASP_RtfText) $this->FormatText($child);
}
// Pop state from stack.
array_pop($this->states);
$this->state = $this->states[sizeof($this->states) - 1];
}
protected function FormatControlWord($word) {
if ($word->word == "plain") $this->state->Reset();
elseif ($word->word == "b") $this->state->bold = $word->parameter;
elseif ($word->word == "i") $this->state->italic = $word->parameter;
elseif ($word->word == "ul") $this->state->underline = $word->parameter;
elseif ($word->word == "ulnone") $this->state->end_underline = $word->parameter;
elseif ($word->word == "strike") $this->state->strike = $word->parameter;
elseif ($word->word == "v") $this->state->hidden = $word->parameter;
elseif ($word->word == "fs") $this->state->fontsize = ceil(($word->parameter / 24) * 16);
elseif ($word->word == "par") {
// close previously opened 'span' tag
$this->CloseTag();
// decide whether to open or to close a 'p' tag
if ($this->openedTags["p"]) $this->CloseTag("p");
else {
$this->output .= "<p>";
$this->openedTags['p'] = True;
}
} // Characters:
elseif ($word->word == "lquote") $this->output .= "&lsquo;";
elseif ($word->word == "rquote") $this->output .= "&rsquo;";
elseif ($word->word == "ldblquote") $this->output .= "&ldquo;";
elseif ($word->word == "rdblquote") $this->output .= "&rdquo;";
elseif ($word->word == "emdash") $this->output .= "&mdash;";
elseif ($word->word == "endash") $this->output .= "&ndash;";
elseif ($word->word == "bullet") $this->output .= "&bull;";
// Print Unicode character
elseif ($word->word == "u")
$this->ApplyStyle("&#" . $word->parameter . ";");
}
protected function ApplyStyle($txt) {
// create span only when a style change occur
if ($this->previousState != $this->state) {
$span = "";
if ($this->state->bold) $span .= "font-weight:bold;";
if ($this->state->italic) $span .= "font-style:italic;";
if ($this->state->underline) $span .= "text-decoration:underline;";
if ($this->state->end_underline) $span .= "text-decoration:none;";
if ($this->state->strike) $span .= "text-decoration:strikethrough;";
if ($this->state->hidden) $span .= "display:none;";
if ($this->state->fontsize != 0) $span .= "font-size: {$this->state->fontsize}px;";
// Keep track of preceding style
$this->previousState = clone $this->state;
// close previously opened 'span' tag
$this->CloseTag();
$this->output .= "<span style=\"{$span}\">" . $txt;
$this->openedTags["span"] = True;
} else $this->output .= $txt;
}
protected function CloseTag($tag = "span") {
if ($this->openedTags[$tag]) {
$this->output .= "</{$tag}>";
$this->openedTags[$tag] = False;
}
}
protected function FormatControlSymbol($symbol) {
if ($symbol->symbol == '\'')
$this->ApplyStyle(htmlentities(chr($symbol->parameter), ENT_QUOTES, 'UTF-8'));
}
protected function FormatText($text) {
$this->ApplyStyle($text->text);
}
}

View File

@@ -0,0 +1,7 @@
<?php
// autoload.php @generated by Composer
require_once __DIR__ . '/composer/autoload_real.php';
return ComposerAutoloaderInitdeffb6dedd7da61f552f6836b399890f::getLoader();

View File

@@ -0,0 +1,481 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Autoload;
/**
* ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
*
* $loader = new \Composer\Autoload\ClassLoader();
*
* // register classes with namespaces
* $loader->add('Symfony\Component', __DIR__.'/component');
* $loader->add('Symfony', __DIR__.'/framework');
*
* // activate the autoloader
* $loader->register();
*
* // to enable searching the include path (eg. for PEAR packages)
* $loader->setUseIncludePath(true);
*
* In this example, if you try to use a class in the Symfony\Component
* namespace or one of its children (Symfony\Component\Console for instance),
* the autoloader will first look for the class under the component/
* directory, and it will then fallback to the framework/ directory if not
* found before giving up.
*
* This class is loosely based on the Symfony UniversalClassLoader.
*
* @author Fabien Potencier <fabien@symfony.com>
* @author Jordi Boggiano <j.boggiano@seld.be>
* @see https://www.php-fig.org/psr/psr-0/
* @see https://www.php-fig.org/psr/psr-4/
*/
class ClassLoader
{
private $vendorDir;
// PSR-4
private $prefixLengthsPsr4 = array();
private $prefixDirsPsr4 = array();
private $fallbackDirsPsr4 = array();
// PSR-0
private $prefixesPsr0 = array();
private $fallbackDirsPsr0 = array();
private $useIncludePath = false;
private $classMap = array();
private $classMapAuthoritative = false;
private $missingClasses = array();
private $apcuPrefix;
private static $registeredLoaders = array();
public function __construct($vendorDir = null)
{
$this->vendorDir = $vendorDir;
}
public function getPrefixes()
{
if (!empty($this->prefixesPsr0)) {
return call_user_func_array('array_merge', array_values($this->prefixesPsr0));
}
return array();
}
public function getPrefixesPsr4()
{
return $this->prefixDirsPsr4;
}
public function getFallbackDirs()
{
return $this->fallbackDirsPsr0;
}
public function getFallbackDirsPsr4()
{
return $this->fallbackDirsPsr4;
}
public function getClassMap()
{
return $this->classMap;
}
/**
* @param array $classMap Class to filename map
*/
public function addClassMap(array $classMap)
{
if ($this->classMap) {
$this->classMap = array_merge($this->classMap, $classMap);
} else {
$this->classMap = $classMap;
}
}
/**
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
*/
public function add($prefix, $paths, $prepend = false)
{
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
(array) $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
(array) $paths
);
}
return;
}
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
(array) $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
(array) $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
(array) $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
// Register directories for a new namespace.
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
(array) $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 base directories
*/
public function set($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr0 = (array) $paths;
} else {
$this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
}
}
/**
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*/
public function setPsr4($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr4 = (array) $paths;
} else {
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
}
}
/**
* Turns on searching the include path for class files.
*
* @param bool $useIncludePath
*/
public function setUseIncludePath($useIncludePath)
{
$this->useIncludePath = $useIncludePath;
}
/**
* Can be used to check if the autoloader uses the include path to check
* for classes.
*
* @return bool
*/
public function getUseIncludePath()
{
return $this->useIncludePath;
}
/**
* Turns off searching the prefix and fallback directories for classes
* that have not been registered with the class map.
*
* @param bool $classMapAuthoritative
*/
public function setClassMapAuthoritative($classMapAuthoritative)
{
$this->classMapAuthoritative = $classMapAuthoritative;
}
/**
* Should class lookup fail if not found in the current class map?
*
* @return bool
*/
public function isClassMapAuthoritative()
{
return $this->classMapAuthoritative;
}
/**
* APCu prefix to use to cache found/not-found classes, if the extension is enabled.
*
* @param string|null $apcuPrefix
*/
public function setApcuPrefix($apcuPrefix)
{
$this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null;
}
/**
* The APCu prefix in use, or null if APCu caching is not enabled.
*
* @return string|null
*/
public function getApcuPrefix()
{
return $this->apcuPrefix;
}
/**
* Registers this instance as an autoloader.
*
* @param bool $prepend Whether to prepend the autoloader or not
*/
public function register($prepend = false)
{
spl_autoload_register(array($this, 'loadClass'), true, $prepend);
if (null === $this->vendorDir) {
return;
}
if ($prepend) {
self::$registeredLoaders = array($this->vendorDir => $this) + self::$registeredLoaders;
} else {
unset(self::$registeredLoaders[$this->vendorDir]);
self::$registeredLoaders[$this->vendorDir] = $this;
}
}
/**
* Unregisters this instance as an autoloader.
*/
public function unregister()
{
spl_autoload_unregister(array($this, 'loadClass'));
if (null !== $this->vendorDir) {
unset(self::$registeredLoaders[$this->vendorDir]);
}
}
/**
* Loads the given class or interface.
*
* @param string $class The name of the class
* @return true|null True if loaded, null otherwise
*/
public function loadClass($class)
{
if ($file = $this->findFile($class)) {
includeFile($file);
return true;
}
return null;
}
/**
* Finds the path to the file where the class is defined.
*
* @param string $class The name of the class
*
* @return string|false The path if found, false otherwise
*/
public function findFile($class)
{
// class map lookup
if (isset($this->classMap[$class])) {
return $this->classMap[$class];
}
if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
return false;
}
if (null !== $this->apcuPrefix) {
$file = apcu_fetch($this->apcuPrefix.$class, $hit);
if ($hit) {
return $file;
}
}
$file = $this->findFileWithExtension($class, '.php');
// Search for Hack files if we are running on HHVM
if (false === $file && defined('HHVM_VERSION')) {
$file = $this->findFileWithExtension($class, '.hh');
}
if (null !== $this->apcuPrefix) {
apcu_add($this->apcuPrefix.$class, $file);
}
if (false === $file) {
// Remember that this class does not exist.
$this->missingClasses[$class] = true;
}
return $file;
}
/**
* Returns the currently registered loaders indexed by their corresponding vendor directories.
*
* @return self[]
*/
public static function getRegisteredLoaders()
{
return self::$registeredLoaders;
}
private function findFileWithExtension($class, $ext)
{
// PSR-4 lookup
$logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
$first = $class[0];
if (isset($this->prefixLengthsPsr4[$first])) {
$subPath = $class;
while (false !== $lastPos = strrpos($subPath, '\\')) {
$subPath = substr($subPath, 0, $lastPos);
$search = $subPath . '\\';
if (isset($this->prefixDirsPsr4[$search])) {
$pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
foreach ($this->prefixDirsPsr4[$search] as $dir) {
if (file_exists($file = $dir . $pathEnd)) {
return $file;
}
}
}
}
}
// PSR-4 fallback dirs
foreach ($this->fallbackDirsPsr4 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
return $file;
}
}
// PSR-0 lookup
if (false !== $pos = strrpos($class, '\\')) {
// namespaced class name
$logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
. strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
} else {
// PEAR-like class name
$logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
}
if (isset($this->prefixesPsr0[$first])) {
foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
if (0 === strpos($class, $prefix)) {
foreach ($dirs as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
}
}
}
// PSR-0 fallback dirs
foreach ($this->fallbackDirsPsr0 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
// PSR-0 include paths.
if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
return $file;
}
return false;
}
}
/**
* Scope isolated include.
*
* Prevents access to $this/self from included files.
*/
function includeFile($file)
{
include $file;
}

View File

@@ -0,0 +1,337 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer;
use Composer\Autoload\ClassLoader;
use Composer\Semver\VersionParser;
/**
* This class is copied in every Composer installed project and available to all
*
* See also https://getcomposer.org/doc/07-runtime.md#installed-versions
*
* To require it's presence, you can require `composer-runtime-api ^2.0`
*/
class InstalledVersions
{
private static $installed;
private static $canGetVendors;
private static $installedByVendor = array();
/**
* Returns a list of all package names which are present, either by being installed, replaced or provided
*
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackages()
{
$packages = array();
foreach (self::getInstalled() as $installed) {
$packages[] = array_keys($installed['versions']);
}
if (1 === \count($packages)) {
return $packages[0];
}
return array_keys(array_flip(\call_user_func_array('array_merge', $packages)));
}
/**
* Returns a list of all package names with a specific type e.g. 'library'
*
* @param string $type
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackagesByType($type)
{
$packagesByType = array();
foreach (self::getInstalled() as $installed) {
foreach ($installed['versions'] as $name => $package) {
if (isset($package['type']) && $package['type'] === $type) {
$packagesByType[] = $name;
}
}
}
return $packagesByType;
}
/**
* Checks whether the given package is installed
*
* This also returns true if the package name is provided or replaced by another package
*
* @param string $packageName
* @param bool $includeDevRequirements
* @return bool
*/
public static function isInstalled($packageName, $includeDevRequirements = true)
{
foreach (self::getInstalled() as $installed) {
if (isset($installed['versions'][$packageName])) {
return $includeDevRequirements || empty($installed['versions'][$packageName]['dev_requirement']);
}
}
return false;
}
/**
* Checks whether the given package satisfies a version constraint
*
* e.g. If you want to know whether version 2.3+ of package foo/bar is installed, you would call:
*
* Composer\InstalledVersions::satisfies(new VersionParser, 'foo/bar', '^2.3')
*
* @param VersionParser $parser Install composer/semver to have access to this class and functionality
* @param string $packageName
* @param string|null $constraint A version constraint to check for, if you pass one you have to make sure composer/semver is required by your package
* @return bool
*/
public static function satisfies(VersionParser $parser, $packageName, $constraint)
{
$constraint = $parser->parseConstraints($constraint);
$provided = $parser->parseConstraints(self::getVersionRanges($packageName));
return $provided->matches($constraint);
}
/**
* Returns a version constraint representing all the range(s) which are installed for a given package
*
* It is easier to use this via isInstalled() with the $constraint argument if you need to check
* whether a given version of a package is installed, and not just whether it exists
*
* @param string $packageName
* @return string Version constraint usable with composer/semver
*/
public static function getVersionRanges($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
$ranges = array();
if (isset($installed['versions'][$packageName]['pretty_version'])) {
$ranges[] = $installed['versions'][$packageName]['pretty_version'];
}
if (array_key_exists('aliases', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['aliases']);
}
if (array_key_exists('replaced', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['replaced']);
}
if (array_key_exists('provided', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['provided']);
}
return implode(' || ', $ranges);
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['version'])) {
return null;
}
return $installed['versions'][$packageName]['version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getPrettyVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['pretty_version'])) {
return null;
}
return $installed['versions'][$packageName]['pretty_version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as reference
*/
public static function getReference($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['reference'])) {
return null;
}
return $installed['versions'][$packageName]['reference'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as install path. Packages of type metapackages also have a null install path.
*/
public static function getInstallPath($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
return isset($installed['versions'][$packageName]['install_path']) ? $installed['versions'][$packageName]['install_path'] : null;
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @return array
* @psalm-return array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}
*/
public static function getRootPackage()
{
$installed = self::getInstalled();
return $installed[0]['root'];
}
/**
* Returns the raw installed.php data for custom implementations
*
* @deprecated Use getAllRawData() instead which returns all datasets for all autoloaders present in the process. getRawData only returns the first dataset loaded, which may not be what you expect.
* @return array[]
* @psalm-return array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}
*/
public static function getRawData()
{
@trigger_error('getRawData only returns the first dataset loaded, which may not be what you expect. Use getAllRawData() instead which returns all datasets for all autoloaders present in the process.', E_USER_DEPRECATED);
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = include __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
return self::$installed;
}
/**
* Returns the raw data of all installed.php which are currently loaded for custom implementations
*
* @return array[]
* @psalm-return list<array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}>
*/
public static function getAllRawData()
{
return self::getInstalled();
}
/**
* Lets you reload the static array from another file
*
* This is only useful for complex integrations in which a project needs to use
* this class but then also needs to execute another project's autoloader in process,
* and wants to ensure both projects have access to their version of installed.php.
*
* A typical case would be PHPUnit, where it would need to make sure it reads all
* the data it needs from this class, then call reload() with
* `require $CWD/vendor/composer/installed.php` (or similar) as input to make sure
* the project in which it runs can then also use this class safely, without
* interference between PHPUnit's dependencies and the project's dependencies.
*
* @param array[] $data A vendor/composer/installed.php data set
* @return void
*
* @psalm-param array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>} $data
*/
public static function reload($data)
{
self::$installed = $data;
self::$installedByVendor = array();
}
/**
* @return array[]
* @psalm-return list<array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}>
*/
private static function getInstalled()
{
if (null === self::$canGetVendors) {
self::$canGetVendors = method_exists('Composer\Autoload\ClassLoader', 'getRegisteredLoaders');
}
$installed = array();
if (self::$canGetVendors) {
foreach (ClassLoader::getRegisteredLoaders() as $vendorDir => $loader) {
if (isset(self::$installedByVendor[$vendorDir])) {
$installed[] = self::$installedByVendor[$vendorDir];
} elseif (is_file($vendorDir.'/composer/installed.php')) {
$installed[] = self::$installedByVendor[$vendorDir] = require $vendorDir.'/composer/installed.php';
if (null === self::$installed && strtr($vendorDir.'/composer', '\\', '/') === strtr(__DIR__, '\\', '/')) {
self::$installed = $installed[count($installed) - 1];
}
}
}
}
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = require __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
$installed[] = self::$installed;
return $installed;
}
}

View File

@@ -0,0 +1,21 @@
Copyright (c) Nils Adermann, Jordi Boggiano
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -0,0 +1,10 @@
<?php
// autoload_classmap.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'Composer\\InstalledVersions' => $vendorDir . '/composer/InstalledVersions.php',
);

View File

@@ -0,0 +1,10 @@
<?php
// autoload_files.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'0e6d7bf4a5811bfa5cf40c5ccd6fae6a' => $vendorDir . '/symfony/polyfill-mbstring/bootstrap.php',
);

View File

@@ -0,0 +1,10 @@
<?php
// autoload_namespaces.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'Smalot\\PdfParser\\' => array($vendorDir . '/smalot/pdfparser/src'),
);

View File

@@ -0,0 +1,10 @@
<?php
// autoload_psr4.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'Symfony\\Polyfill\\Mbstring\\' => array($vendorDir . '/symfony/polyfill-mbstring'),
);

View File

@@ -0,0 +1,75 @@
<?php
// autoload_real.php @generated by Composer
class ComposerAutoloaderInitdeffb6dedd7da61f552f6836b399890f
{
private static $loader;
public static function loadClassLoader($class)
{
if ('Composer\Autoload\ClassLoader' === $class) {
require __DIR__ . '/ClassLoader.php';
}
}
/**
* @return \Composer\Autoload\ClassLoader
*/
public static function getLoader()
{
if (null !== self::$loader) {
return self::$loader;
}
require __DIR__ . '/platform_check.php';
spl_autoload_register(array('ComposerAutoloaderInitdeffb6dedd7da61f552f6836b399890f', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(\dirname(__FILE__)));
spl_autoload_unregister(array('ComposerAutoloaderInitdeffb6dedd7da61f552f6836b399890f', 'loadClassLoader'));
$useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
if ($useStaticLoader) {
require __DIR__ . '/autoload_static.php';
call_user_func(\Composer\Autoload\ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::getInitializer($loader));
} else {
$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
$loader->set($namespace, $path);
}
$map = require __DIR__ . '/autoload_psr4.php';
foreach ($map as $namespace => $path) {
$loader->setPsr4($namespace, $path);
}
$classMap = require __DIR__ . '/autoload_classmap.php';
if ($classMap) {
$loader->addClassMap($classMap);
}
}
$loader->register(true);
if ($useStaticLoader) {
$includeFiles = Composer\Autoload\ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::$files;
} else {
$includeFiles = require __DIR__ . '/autoload_files.php';
}
foreach ($includeFiles as $fileIdentifier => $file) {
composerRequiredeffb6dedd7da61f552f6836b399890f($fileIdentifier, $file);
}
return $loader;
}
}
function composerRequiredeffb6dedd7da61f552f6836b399890f($fileIdentifier, $file)
{
if (empty($GLOBALS['__composer_autoload_files'][$fileIdentifier])) {
require $file;
$GLOBALS['__composer_autoload_files'][$fileIdentifier] = true;
}
}

View File

@@ -0,0 +1,51 @@
<?php
// autoload_static.php @generated by Composer
namespace Composer\Autoload;
class ComposerStaticInitdeffb6dedd7da61f552f6836b399890f
{
public static $files = array (
'0e6d7bf4a5811bfa5cf40c5ccd6fae6a' => __DIR__ . '/..' . '/symfony/polyfill-mbstring/bootstrap.php',
);
public static $prefixLengthsPsr4 = array (
'S' =>
array (
'Symfony\\Polyfill\\Mbstring\\' => 26,
),
);
public static $prefixDirsPsr4 = array (
'Symfony\\Polyfill\\Mbstring\\' =>
array (
0 => __DIR__ . '/..' . '/symfony/polyfill-mbstring',
),
);
public static $prefixesPsr0 = array (
'S' =>
array (
'Smalot\\PdfParser\\' =>
array (
0 => __DIR__ . '/..' . '/smalot/pdfparser/src',
),
),
);
public static $classMap = array (
'Composer\\InstalledVersions' => __DIR__ . '/..' . '/composer/InstalledVersions.php',
);
public static function getInitializer(ClassLoader $loader)
{
return \Closure::bind(function () use ($loader) {
$loader->prefixLengthsPsr4 = ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::$prefixLengthsPsr4;
$loader->prefixDirsPsr4 = ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::$prefixDirsPsr4;
$loader->prefixesPsr0 = ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::$prefixesPsr0;
$loader->classMap = ComposerStaticInitdeffb6dedd7da61f552f6836b399890f::$classMap;
}, null, ClassLoader::class);
}
}

View File

@@ -0,0 +1,142 @@
{
"packages": [
{
"name": "smalot/pdfparser",
"version": "v1.1.0",
"version_normalized": "1.1.0.0",
"source": {
"type": "git",
"url": "https://github.com/smalot/pdfparser.git",
"reference": "43e436f32fd0e3d1f808c3b1768975c598c9a7df"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/smalot/pdfparser/zipball/43e436f32fd0e3d1f808c3b1768975c598c9a7df",
"reference": "43e436f32fd0e3d1f808c3b1768975c598c9a7df",
"shasum": ""
},
"require": {
"ext-zlib": "*",
"php": ">=7.1",
"symfony/polyfill-mbstring": "^1.18"
},
"time": "2021-08-03T08:33:34+00:00",
"type": "library",
"installation-source": "dist",
"autoload": {
"psr-0": {
"Smalot\\PdfParser\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"LGPL-3.0"
],
"authors": [
{
"name": "Sebastien MALOT",
"email": "sebastien@malot.fr"
}
],
"description": "Pdf parser library. Can read and extract information from pdf file.",
"homepage": "https://www.pdfparser.org",
"keywords": [
"extract",
"parse",
"parser",
"pdf",
"text"
],
"support": {
"issues": "https://github.com/smalot/pdfparser/issues",
"source": "https://github.com/smalot/pdfparser/tree/v1.1.0"
},
"install-path": "../smalot/pdfparser"
},
{
"name": "symfony/polyfill-mbstring",
"version": "v1.23.1",
"version_normalized": "1.23.1.0",
"source": {
"type": "git",
"url": "https://github.com/symfony/polyfill-mbstring.git",
"reference": "9174a3d80210dca8daa7f31fec659150bbeabfc6"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/symfony/polyfill-mbstring/zipball/9174a3d80210dca8daa7f31fec659150bbeabfc6",
"reference": "9174a3d80210dca8daa7f31fec659150bbeabfc6",
"shasum": ""
},
"require": {
"php": ">=7.1"
},
"suggest": {
"ext-mbstring": "For best performance"
},
"time": "2021-05-27T12:26:48+00:00",
"type": "library",
"extra": {
"branch-alias": {
"dev-main": "1.23-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
},
"installation-source": "dist",
"autoload": {
"psr-4": {
"Symfony\\Polyfill\\Mbstring\\": ""
},
"files": [
"bootstrap.php"
]
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"description": "Symfony polyfill for the Mbstring extension",
"homepage": "https://symfony.com",
"keywords": [
"compatibility",
"mbstring",
"polyfill",
"portable",
"shim"
],
"support": {
"source": "https://github.com/symfony/polyfill-mbstring/tree/v1.23.1"
},
"funding": [
{
"url": "https://symfony.com/sponsor",
"type": "custom"
},
{
"url": "https://github.com/fabpot",
"type": "github"
},
{
"url": "https://tidelift.com/funding/github/packagist/symfony/symfony",
"type": "tidelift"
}
],
"install-path": "../symfony/polyfill-mbstring"
}
],
"dev": true,
"dev-package-names": []
}

View File

@@ -0,0 +1,41 @@
<?php return array(
'root' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'reference' => '6582fe4c96d02e645921e4fb4300f84719b6b503',
'name' => 'vendor_name/build',
'dev' => true,
),
'versions' => array(
'smalot/pdfparser' => array(
'pretty_version' => 'v1.1.0',
'version' => '1.1.0.0',
'type' => 'library',
'install_path' => __DIR__ . '/../smalot/pdfparser',
'aliases' => array(),
'reference' => '43e436f32fd0e3d1f808c3b1768975c598c9a7df',
'dev_requirement' => false,
),
'symfony/polyfill-mbstring' => array(
'pretty_version' => 'v1.23.1',
'version' => '1.23.1.0',
'type' => 'library',
'install_path' => __DIR__ . '/../symfony/polyfill-mbstring',
'aliases' => array(),
'reference' => '9174a3d80210dca8daa7f31fec659150bbeabfc6',
'dev_requirement' => false,
),
'vendor_name/build' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'reference' => '6582fe4c96d02e645921e4fb4300f84719b6b503',
'dev_requirement' => false,
),
),
);

View File

@@ -0,0 +1,26 @@
<?php
// platform_check.php @generated by Composer
$issues = array();
if (!(PHP_VERSION_ID >= 70100)) {
$issues[] = 'Your Composer dependencies require a PHP version ">= 7.1.0". You are running ' . PHP_VERSION . '.';
}
if ($issues) {
if (!headers_sent()) {
header('HTTP/1.1 500 Internal Server Error');
}
if (!ini_get('display_errors')) {
if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
fwrite(STDERR, 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . implode(PHP_EOL, $issues) . PHP_EOL.PHP_EOL);
} elseif (!headers_sent()) {
echo 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . str_replace('You are running '.PHP_VERSION.'.', '', implode(PHP_EOL, $issues)) . PHP_EOL.PHP_EOL;
}
}
trigger_error(
'Composer detected issues in your platform: ' . implode(' ', $issues),
E_USER_ERROR
);
}

View File

@@ -0,0 +1,40 @@
# Developers
## .editorconfig
Please make sure your editor uses our `.editorconfig` file. It contains rules about our coding styles.
## Development Tools and Tests
Our test related files are located in `tests` folder.
Tests are written using PHPUnit.
To install (and update) development tools like PHPUnit or PHP-CS-Fixer run:
> make install-dev-tools
Development tools are getting installed in `dev-tools/vendor`.
Please check `dev-tools/composer.json` for more information about versions etc.
To run a tool manually you use `dev-tools/vendor/bin`, for instance:
> dev-tools/vendor/bin/php-cs-fixer fix --verbose --dry-run
Below are a few shortcuts to improve your developer experience.
### PHPUnit
To run all tests run:
> make run-phpunit
### PHP-CS-Fixer
To check coding styles run:
> make run-php-cs-fixer
### PHPStan
To run a static code analysis use:
> make run-phpstan

View File

@@ -0,0 +1,165 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 3, 29 June 2007
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
This version of the GNU Lesser General Public License incorporates
the terms and conditions of version 3 of the GNU General Public
License, supplemented by the additional permissions listed below.
0. Additional Definitions.
As used herein, "this License" refers to version 3 of the GNU Lesser
General Public License, and the "GNU GPL" refers to version 3 of the GNU
General Public License.
"The Library" refers to a covered work governed by this License,
other than an Application or a Combined Work as defined below.
An "Application" is any work that makes use of an interface provided
by the Library, but which is not otherwise based on the Library.
Defining a subclass of a class defined by the Library is deemed a mode
of using an interface provided by the Library.
A "Combined Work" is a work produced by combining or linking an
Application with the Library. The particular version of the Library
with which the Combined Work was made is also called the "Linked
Version".
The "Minimal Corresponding Source" for a Combined Work means the
Corresponding Source for the Combined Work, excluding any source code
for portions of the Combined Work that, considered in isolation, are
based on the Application, and not on the Linked Version.
The "Corresponding Application Code" for a Combined Work means the
object code and/or source code for the Application, including any data
and utility programs needed for reproducing the Combined Work from the
Application, but excluding the System Libraries of the Combined Work.
1. Exception to Section 3 of the GNU GPL.
You may convey a covered work under sections 3 and 4 of this License
without being bound by section 3 of the GNU GPL.
2. Conveying Modified Versions.
If you modify a copy of the Library, and, in your modifications, a
facility refers to a function or data to be supplied by an Application
that uses the facility (other than as an argument passed when the
facility is invoked), then you may convey a copy of the modified
version:
a) under this License, provided that you make a good faith effort to
ensure that, in the event an Application does not supply the
function or data, the facility still operates, and performs
whatever part of its purpose remains meaningful, or
b) under the GNU GPL, with none of the additional permissions of
this License applicable to that copy.
3. Object Code Incorporating Material from Library Header Files.
The object code form of an Application may incorporate material from
a header file that is part of the Library. You may convey such object
code under terms of your choice, provided that, if the incorporated
material is not limited to numerical parameters, data structure
layouts and accessors, or small macros, inline functions and templates
(ten or fewer lines in length), you do both of the following:
a) Give prominent notice with each copy of the object code that the
Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the object code with a copy of the GNU GPL and this license
document.
4. Combined Works.
You may convey a Combined Work under terms of your choice that,
taken together, effectively do not restrict modification of the
portions of the Library contained in the Combined Work and reverse
engineering for debugging such modifications, if you also do each of
the following:
a) Give prominent notice with each copy of the Combined Work that
the Library is used in it and that the Library and its use are
covered by this License.
b) Accompany the Combined Work with a copy of the GNU GPL and this license
document.
c) For a Combined Work that displays copyright notices during
execution, include the copyright notice for the Library among
these notices, as well as a reference directing the user to the
copies of the GNU GPL and this license document.
d) Do one of the following:
0) Convey the Minimal Corresponding Source under the terms of this
License, and the Corresponding Application Code in a form
suitable for, and under terms that permit, the user to
recombine or relink the Application with a modified version of
the Linked Version to produce a modified Combined Work, in the
manner specified by section 6 of the GNU GPL for conveying
Corresponding Source.
1) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (a) uses at run time
a copy of the Library already present on the user's computer
system, and (b) will operate properly with a modified version
of the Library that is interface-compatible with the Linked
Version.
e) Provide Installation Information, but only if you would otherwise
be required to provide such information under section 6 of the
GNU GPL, and only to the extent that such information is
necessary to install and execute a modified version of the
Combined Work produced by recombining or relinking the
Application with a modified version of the Linked Version. (If
you use option 4d0, the Installation Information must accompany
the Minimal Corresponding Source and Corresponding Application
Code. If you use option 4d1, you must provide the Installation
Information in the manner specified by section 6 of the GNU GPL
for conveying Corresponding Source.)
5. Combined Libraries.
You may place library facilities that are a work based on the
Library side by side in a single library together with other library
facilities that are not Applications and are not covered by this
License, and convey such a combined library under terms of your
choice, if you do both of the following:
a) Accompany the combined library with a copy of the same work based
on the Library, uncombined with any other library facilities,
conveyed under the terms of this License.
b) Give prominent notice with the combined library that part of it
is a work based on the Library, and explaining where to find the
accompanying uncombined form of the same work.
6. Revised Versions of the GNU Lesser General Public License.
The Free Software Foundation may publish revised and/or new versions
of the GNU Lesser General Public License from time to time. Such new
versions will be similar in spirit to the present version, but may
differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the
Library as you received it specifies that a certain numbered version
of the GNU Lesser General Public License "or any later version"
applies to it, you have the option of following the terms and
conditions either of that published version or of any later version
published by the Free Software Foundation. If the Library as you
received it does not specify a version number of the GNU Lesser
General Public License, you may choose any version of the GNU Lesser
General Public License ever published by the Free Software Foundation.
If the Library as you received it specifies that a proxy can decide
whether future versions of the GNU Lesser General Public License shall
apply, that proxy's public statement of acceptance of any version is
permanent authorization for you to choose that version for the
Library.

View File

@@ -0,0 +1,18 @@
install-dev-tools:
composer update --working-dir=dev-tools
# Workaround to force PHPUnit 7.5.x when running Scrutinizer.
# Scrutinizer fails due to not enough memory when using a newer PHPUnit version (tested with 9.5).
# @see: https://github.com/smalot/pdfparser/issues/410
# @see: https://github.com/smalot/pdfparser/pull/412
prepare-for-scrutinizer:
cd dev-tools && sed -e 's/>=7.5/^7.5/g' composer.json > composer.json2 && rm composer.json && mv composer.json2 composer.json
run-php-cs-fixer:
dev-tools/vendor/bin/php-cs-fixer fix $(ARGS)
run-phpstan:
dev-tools/vendor/bin/phpstan analyze $(ARGS)
run-phpunit:
dev-tools/vendor/bin/phpunit $(ARGS)

View File

@@ -0,0 +1,62 @@
# PdfParser #
Pdf Parser, a standalone PHP library, provides various tools to extract data from a PDF file.
![CI](https://github.com/smalot/pdfparser/workflows/CI/badge.svg)
[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/smalot/pdfparser/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/smalot/pdfparser/?branch=master)
[![Code Coverage](https://scrutinizer-ci.com/g/smalot/pdfparser/badges/coverage.png?b=master)](https://scrutinizer-ci.com/g/smalot/pdfparser/?branch=master)
[![License](https://poser.pugx.org/smalot/pdfparser/license)](//packagist.org/packages/smalot/pdfparser)
[![Latest Stable Version](https://poser.pugx.org/smalot/pdfparser/v)](//packagist.org/packages/smalot/pdfparser)
[![Total Downloads](https://poser.pugx.org/smalot/pdfparser/downloads)](//packagist.org/packages/smalot/pdfparser)
[![Monthly Downloads](https://poser.pugx.org/smalot/pdfparser/d/monthly)](//packagist.org/packages/smalot/pdfparser)
[![Daily Downloads](https://poser.pugx.org/smalot/pdfparser/d/daily)](//packagist.org/packages/smalot/pdfparser)
Website : [https://www.pdfparser.org](https://www.pdfparser.org/?utm_source=GitHub&utm_medium=website&utm_campaign=GitHub)
Test the API on our [demo page](https://www.pdfparser.org/demo).
This project is supported by [Actualys](http://www.actualys.com).
## Features ##
Features included :
- Load/parse objects and headers
- Extract meta data (author, description, ...)
- Extract text from ordered pages
- Support of compressed pdf
- Support of MAC OS Roman charset encoding
- Handling of hexa and octal encoding in text sections
- PSR-0 compliant ([autoloader](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md))
- PSR-1 compliant ([code styling](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-1-basic-coding-standard.md))
Currently, secured documents are not supported.
**This Library is under active maintenance.**
There is no active development by the author of this library (at the moment), but we welcome any pull request adding/extending functionality!
## Documentation ##
[Read the documentation on website](http://www.pdfparser.org/documentation?utm_source=GitHub&utm_medium=documentation&utm_campaign=GitHub).
Original PDF References files can be downloaded from this url: http://www.adobe.com/devnet/pdf/pdf_reference_archive.html
**For developers**: Please read [DEVELOPER.md](DEVELOPER.md) for more information about local development of the PDFParser library.
## Installation
### Using Composer
* Obtain [Composer](https://getcomposer.org)
* Run `composer require smalot/pdfparser`
### Use alternate file loader
In case you can't use Composer, you can include `alt_autoload.php-dist` into your project.
It will load all required files at once.
Afterwards you can use `PDFParser` class and others.
## License ##
This library is under the [LGPLv3 license](https://github.com/smalot/pdfparser/blob/master/LICENSE.txt).

View File

@@ -0,0 +1,74 @@
<?php
/**
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2021-02-09
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*
* --------------------------------------------------------------------------------------
*
* About:
* This file provides an alternative to the Composer-approach.
* Include it into your project and all required files of PDFParser will be loaded automatically.
* Please use it only, if Composer is not available.
*
* How to use:
* 1. include this file as it is OR copy and rename it as you like (and then include it)
* 2. afterwards you can use PDFParser classes
* Done.
*/
/**
* Loads all files found in a given folder.
* Calls itself recursively for all sub folders.
*
* @param string $dir
*/
function requireFilesOfFolder($dir)
{
foreach (new DirectoryIterator($dir) as $fileInfo) {
if (!$fileInfo->isDot()) {
if ($fileInfo->isDir()) {
requireFilesOfFolder($fileInfo->getPathname());
} else {
require_once $fileInfo->getPathname();
}
}
}
}
$rootFolder = __DIR__.'/src/Smalot/PdfParser';
// Manually require files, which can't be loaded automatically that easily.
require_once $rootFolder.'/Element.php';
require_once $rootFolder.'/PDFObject.php';
require_once $rootFolder.'/Font.php';
require_once $rootFolder.'/Page.php';
require_once $rootFolder.'/Element/ElementString.php';
/*
* Load the rest of PDFParser files from /src/Smalot/PDFParser
* Dont worry, it wont load files multiple times.
*/
requireFilesOfFolder($rootFolder);

View File

@@ -0,0 +1,35 @@
{
"name": "smalot/pdfparser",
"description": "Pdf parser library. Can read and extract information from pdf file.",
"keywords": ["PDF", "text", "parser", "parse", "extract"],
"type": "library",
"license": "LGPL-3.0",
"authors": [
{
"name": "Sebastien MALOT",
"email": "sebastien@malot.fr"
}
],
"support": {
"issues": "https://github.com/smalot/pdfparser/issues"
},
"homepage": "https://www.pdfparser.org",
"require": {
"php": ">=7.1",
"symfony/polyfill-mbstring": "^1.18",
"ext-zlib": "*"
},
"autoload": {
"psr-0": {
"Smalot\\PdfParser\\": "src/"
}
},
"autoload-dev": {
"psr-4": {
"Tests\\Smalot\\PdfParser\\": "tests/"
}
},
"config": {
"process-timeout": 1200
}
}

View File

@@ -0,0 +1,103 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Konrad Abicht <hi@inspirito.de>
* @date 2020-11-22
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* This class contains configurations used in various classes. You can override them
* manually, in case default values aren't working.
*
* @see https://github.com/smalot/pdfparser/issues/305
*/
class Config
{
private $fontSpaceLimit = -50;
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespaces = "\0\t\n\f\r ";
/**
* Represents: (NUL, HT, LF, FF, CR, SP)
*
* @var string
*/
private $pdfWhitespacesRegex = '[\0\t\n\f\r ]';
/**
* Whether to retain raw image data as content or discard it to save memory
*
* @var bool
*/
private $retainImageContent = true;
public function getFontSpaceLimit()
{
return $this->fontSpaceLimit;
}
public function setFontSpaceLimit($value)
{
$this->fontSpaceLimit = $value;
}
public function getPdfWhitespaces(): string
{
return $this->pdfWhitespaces;
}
public function setPdfWhitespaces(string $pdfWhitespaces): void
{
$this->pdfWhitespaces = $pdfWhitespaces;
}
public function getPdfWhitespacesRegex(): string
{
return $this->pdfWhitespacesRegex;
}
public function setPdfWhitespacesRegex(string $pdfWhitespacesRegex): void
{
$this->pdfWhitespacesRegex = $pdfWhitespacesRegex;
}
public function getRetainImageContent(): bool
{
return $this->retainImageContent;
}
public function setRetainImageContent(bool $retainImageContent): void
{
$this->retainImageContent = $retainImageContent;
}
}

View File

@@ -0,0 +1,287 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* Technical references :
* - http://www.mactech.com/articles/mactech/Vol.15/15.09/PDFIntro/index.html
* - http://framework.zend.com/issues/secure/attachment/12512/Pdf.php
* - http://www.php.net/manual/en/ref.pdf.php#74211
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
* - http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinAnsiEncoding.pm
*
* Class Document
*/
class Document
{
/**
* @var PDFObject[]
*/
protected $objects = [];
/**
* @var array
*/
protected $dictionary = [];
/**
* @var Header
*/
protected $trailer = null;
/**
* @var array
*/
protected $details = null;
public function __construct()
{
$this->trailer = new Header([], $this);
}
public function init()
{
$this->buildDictionary();
$this->buildDetails();
// Propagate init to objects.
foreach ($this->objects as $object) {
$object->getHeader()->init();
$object->init();
}
}
/**
* Build dictionary based on type header field.
*/
protected function buildDictionary()
{
// Build dictionary.
$this->dictionary = [];
foreach ($this->objects as $id => $object) {
$type = $object->getHeader()->get('Type')->getContent();
if (!empty($type)) {
$this->dictionary[$type][$id] = $id;
}
}
}
/**
* Build details array.
*/
protected function buildDetails()
{
// Build details array.
$details = [];
// Extract document info
if ($this->trailer->has('Info')) {
/** @var PDFObject $info */
$info = $this->trailer->get('Info');
// This could be an ElementMissing object, so we need to check for
// the getHeader method first.
if (null !== $info && method_exists($info, 'getHeader')) {
$details = $info->getHeader()->getDetails();
}
}
// Retrieve the page count
try {
$pages = $this->getPages();
$details['Pages'] = \count($pages);
} catch (\Exception $e) {
$details['Pages'] = 0;
}
$this->details = $details;
}
/**
* @return array
*/
public function getDictionary()
{
return $this->dictionary;
}
/**
* @param PDFObject[] $objects
*/
public function setObjects($objects = [])
{
$this->objects = (array) $objects;
$this->init();
}
/**
* @return PDFObject[]
*/
public function getObjects()
{
return $this->objects;
}
/**
* @param string $id
*
* @return PDFObject|Font|Page|Element|null
*/
public function getObjectById($id)
{
if (isset($this->objects[$id])) {
return $this->objects[$id];
}
return null;
}
/**
* @param string $type
* @param string $subtype
*
* @return array
*/
public function getObjectsByType($type, $subtype = null)
{
$objects = [];
foreach ($this->objects as $id => $object) {
if ($object->getHeader()->get('Type') == $type &&
(null === $subtype || $object->getHeader()->get('Subtype') == $subtype)
) {
$objects[$id] = $object;
}
}
return $objects;
}
/**
* @return PDFObject[]
*/
public function getFonts()
{
return $this->getObjectsByType('Font');
}
/**
* @return Page[]
*
* @throws \Exception
*/
public function getPages()
{
if (isset($this->dictionary['Catalog'])) {
// Search for catalog to list pages.
$id = reset($this->dictionary['Catalog']);
/** @var Pages $object */
$object = $this->objects[$id]->get('Pages');
if (method_exists($object, 'getPages')) {
return $object->getPages(true);
}
}
if (isset($this->dictionary['Pages'])) {
// Search for pages to list kids.
$pages = [];
/** @var Pages[] $objects */
$objects = $this->getObjectsByType('Pages');
foreach ($objects as $object) {
$pages = array_merge($pages, $object->getPages(true));
}
return $pages;
}
if (isset($this->dictionary['Page'])) {
// Search for 'page' (unordered pages).
$pages = $this->getObjectsByType('Page');
return array_values($pages);
}
throw new \Exception('Missing catalog.');
}
/**
* @param Page $page
*
* @return string
*/
public function getText(Page $page = null)
{
$texts = [];
$pages = $this->getPages();
foreach ($pages as $index => $page) {
/**
* In some cases, the $page variable may be null.
*/
if (null === $page) {
continue;
}
if ($text = trim($page->getText())) {
$texts[] = $text;
}
}
return implode("\n\n", $texts);
}
/**
* @return Header
*/
public function getTrailer()
{
return $this->trailer;
}
public function setTrailer(Header $trailer)
{
$this->trailer = $trailer;
}
/**
* @return array
*/
public function getDetails($deep = true)
{
return $this->details;
}
}

View File

@@ -0,0 +1,169 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Element
*/
class Element
{
/**
* @var Document
*/
protected $document = null;
protected $value = null;
/**
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
$this->value = $value;
$this->document = $document;
}
public function init()
{
}
/**
* @return bool
*/
public function equals($value)
{
return $value == $this->value;
}
/**
* @return bool
*/
public function contains($value)
{
if (\is_array($this->value)) {
/** @var Element $val */
foreach ($this->value as $val) {
if ($val->equals($value)) {
return true;
}
}
return false;
}
return $this->equals($value);
}
public function getContent()
{
return $this->value;
}
/**
* @return string
*/
public function __toString()
{
return (string) ($this->value);
}
/**
* @param string $content
* @param Document $document
* @param int $position
*
* @return array
*
* @throws \Exception
*/
public static function parse($content, Document $document = null, &$position = 0)
{
$args = \func_get_args();
$only_values = isset($args[3]) ? $args[3] : false;
$content = trim($content);
$values = [];
do {
$old_position = $position;
if (!$only_values) {
if (!preg_match('/^\s*(?P<name>\/[A-Z0-9\._]+)(?P<value>.*)/si', substr($content, $position), $match)) {
break;
} else {
$name = ltrim($match['name'], '/');
$value = $match['value'];
$position = strpos($content, $value, $position + \strlen($match['name']));
}
} else {
$name = \count($values);
$value = substr($content, $position);
}
if ($element = ElementName::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementXRef::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNumeric::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementStruct::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementBoolean::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementNull::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementDate::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementString::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementHexa::parse($value, $document, $position)) {
$values[$name] = $element;
} elseif ($element = ElementArray::parse($value, $document, $position)) {
$values[$name] = $element;
} else {
$position = $old_position;
break;
}
} while ($position < \strlen($content));
return $values;
}
}

View File

@@ -0,0 +1,156 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\PDFObject;
/**
* Class ElementArray
*/
class ElementArray extends Element
{
/**
* @param string $value
* @param Document $document
*/
public function __construct($value, Document $document = null)
{
parent::__construct($value, $document);
}
public function getContent()
{
foreach ($this->value as $name => $element) {
$this->resolveXRef($name);
}
return parent::getContent();
}
/**
* @return array
*/
public function getRawContent()
{
return $this->value;
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
$values = [];
$elements = $this->getContent();
foreach ($elements as $key => $element) {
if ($element instanceof Header && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof self) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element && !($element instanceof self)) {
$values[$key] = $element->getContent();
}
}
return $values;
}
/**
* @return string
*/
public function __toString()
{
return implode(',', $this->value);
}
/**
* @param string $name
*
* @return Element|PDFObject
*/
protected function resolveXRef($name)
{
if (($obj = $this->value[$name]) instanceof ElementXRef) {
/** @var PDFObject $obj */
$obj = $this->document->getObjectById($obj->getId());
$this->value[$name] = $obj;
}
return $this->value[$name];
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementArray
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\[(?P<array>.*)/is', $content, $match)) {
preg_match_all('/(.*?)(\[|\])/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '[') ? 1 : -1);
if ($level <= 0) {
break;
}
}
// Removes 1 level [ and ].
$sub = substr(trim($sub), 1, -1);
$sub_offset = 0;
$values = Element::parse($sub, $document, $sub_offset, true);
$offset += strpos($content, '[') + 1;
// Find next ']' position
$offset += \strlen($sub) + 1;
return new self($values, $document);
}
return false;
}
}

View File

@@ -0,0 +1,83 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementBoolean
*/
class ElementBoolean extends Element
{
/**
* @param string|bool $value
*/
public function __construct($value)
{
parent::__construct(('true' == strtolower($value) || true === $value), null);
}
/**
* @return string
*/
public function __toString()
{
return $this->value ? 'true' : 'false';
}
/**
* @return bool
*/
public function equals($value)
{
return $this->getContent() === $value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementBoolean
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<value>true|false)/is', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View File

@@ -0,0 +1,148 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHPi, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementDate
*/
class ElementDate extends ElementString
{
/**
* @var array
*/
protected static $formats = [
4 => 'Y',
6 => 'Ym',
8 => 'Ymd',
10 => 'YmdH',
12 => 'YmdHi',
14 => 'YmdHis',
15 => 'YmdHise',
17 => 'YmdHisO',
18 => 'YmdHisO',
19 => 'YmdHisO',
];
/**
* @var string
*/
protected $format = 'c';
/**
* @param \DateTime $value
*/
public function __construct($value)
{
if (!($value instanceof \DateTime)) {
throw new \Exception('DateTime required.');
}
parent::__construct($value);
}
/**
* @param string $format
*/
public function setFormat($format)
{
$this->format = $format;
}
/**
* @return bool
*/
public function equals($value)
{
if ($value instanceof \DateTime) {
$timestamp = $value->getTimeStamp();
} else {
$timestamp = strtotime($value);
}
return $timestamp == $this->value->getTimeStamp();
}
/**
* @return string
*/
public function __toString()
{
return (string) ($this->value->format($this->format));
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementDate
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\(D\:(?P<name>.*?)\)/s', $content, $match)) {
$name = $match['name'];
$name = str_replace("'", '', $name);
$date = false;
// Smallest format : Y
// Full format : YmdHisP
if (preg_match('/^\d{4}(\d{2}(\d{2}(\d{2}(\d{2}(\d{2}(Z(\d{2,4})?|[\+-]?\d{2}(\d{2})?)?)?)?)?)?)?$/', $name)) {
if ($pos = strpos($name, 'Z')) {
$name = substr($name, 0, $pos + 1);
} elseif (18 == \strlen($name) && preg_match('/[^\+-]0000$/', $name)) {
$name = substr($name, 0, -4).'+0000';
}
$format = self::$formats[\strlen($name)];
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
} else {
// special cases
if (preg_match('/^\d{1,2}-\d{1,2}-\d{4},?\s+\d{2}:\d{2}:\d{2}[\+-]\d{4}$/', $name)) {
$name = str_replace(',', '', $name);
$format = 'n-j-Y H:i:sO';
$date = \DateTime::createFromFormat($format, $name, new \DateTimeZone('UTC'));
}
}
if (!$date) {
return false;
}
$offset += strpos($content, '(D:') + \strlen($match['name']) + 4; // 1 for '(D:' and ')'
return new self($date);
}
return false;
}
}

View File

@@ -0,0 +1,91 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
/**
* Class ElementHexa
*/
class ElementHexa extends ElementString
{
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementHexa|ElementDate
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\<(?P<name>[A-F0-9]+)\>/is', $content, $match)) {
$name = $match['name'];
$offset += strpos($content, '<'.$name) + \strlen($name) + 2; // 1 for '>'
// repackage string as standard
$name = '('.self::decode($name, $document).')';
$element = false;
if (!($element = ElementDate::parse($name, $document))) {
$element = ElementString::parse($name, $document);
}
return $element;
}
return false;
}
/**
* @param string $value
* @param Document $document
*/
public static function decode($value, Document $document = null)
{
$text = '';
$length = \strlen($value);
if ('00' === substr($value, 0, 2)) {
for ($i = 0; $i < $length; $i += 4) {
$hex = substr($value, $i, 4);
$text .= '&#'.str_pad(hexdec($hex), 4, '0', \STR_PAD_LEFT).';';
}
} else {
for ($i = 0; $i < $length; $i += 2) {
$hex = substr($value, $i, 2);
$text .= \chr(hexdec($hex));
}
}
$text = html_entity_decode($text, \ENT_NOQUOTES, 'UTF-8');
return $text;
}
}

View File

@@ -0,0 +1,76 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Element;
/**
* Class ElementMissing
*/
class ElementMissing extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
/**
* @return bool
*/
public function equals($value)
{
return false;
}
/**
* @return bool
*/
public function contains($value)
{
return false;
}
/**
* @return bool
*/
public function getContent()
{
return false;
}
/**
* @return string
*/
public function __toString()
{
return '';
}
}

View File

@@ -0,0 +1,77 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementName
*/
class ElementName extends Element
{
/**
* @param string $value
*/
public function __construct($value)
{
parent::__construct($value, null);
}
/**
* @return bool
*/
public function equals($value)
{
return $value == $this->value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementName
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\/([A-Z0-9\-\+,#\.]+)/is', $content, $match)) {
$name = $match[1];
$offset += strpos($content, $name) + \strlen($name);
$name = Font::decodeEntities($name);
return new self($name);
}
return false;
}
}

View File

@@ -0,0 +1,79 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNull
*/
class ElementNull extends Element
{
public function __construct()
{
parent::__construct(null, null);
}
/**
* @return string
*/
public function __toString()
{
return 'null';
}
/**
* @return bool
*/
public function equals($value)
{
return $this->getContent() === $value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementNull
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(null)/s', $content, $match)) {
$offset += strpos($content, 'null') + \strlen('null');
return new self();
}
return false;
}
}

View File

@@ -0,0 +1,67 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementNumeric
*/
class ElementNumeric extends Element
{
/**
* @param string $value
*/
public function __construct($value)
{
parent::__construct((float) $value, null);
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementNumeric
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<value>\-?[0-9\.]+)/s', $content, $match)) {
$value = $match['value'];
$offset += strpos($content, $value) + \strlen($value);
return new self($value);
}
return false;
}
}

View File

@@ -0,0 +1,101 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Font;
/**
* Class ElementString
*/
class ElementString extends Element
{
/**
* @param string $value
*/
public function __construct($value)
{
parent::__construct($value, null);
}
/**
* @return bool
*/
public function equals($value)
{
return $value == $this->value;
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementString
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*\((?P<name>.*)/s', $content, $match)) {
$name = $match['name'];
// Find next ')' not escaped.
$cur_start_text = $start_search_end = 0;
while (false !== ($cur_start_pos = strpos($name, ')', $start_search_end))) {
$cur_extract = substr($name, $cur_start_text, $cur_start_pos - $cur_start_text);
preg_match('/(?P<escape>[\\\]*)$/s', $cur_extract, $match);
if (!(\strlen($match['escape']) % 2)) {
break;
}
$start_search_end = $cur_start_pos + 1;
}
// Extract string.
$name = substr($name, 0, (int) $cur_start_pos);
$offset += strpos($content, '(') + $cur_start_pos + 2; // 2 for '(' and ')'
$name = str_replace(
['\\\\', '\\ ', '\\/', '\(', '\)', '\n', '\r', '\t'],
['\\', ' ', '/', '(', ')', "\n", "\r", "\t"],
$name
);
// Decode string.
$name = Font::decodeOctal($name);
$name = Font::decodeEntities($name);
$name = Font::decodeHexadecimal($name, false);
$name = Font::decodeUnicode($name);
return new self($name);
}
return false;
}
}

View File

@@ -0,0 +1,77 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
use Smalot\PdfParser\Header;
/**
* Class ElementStruct
*/
class ElementStruct extends Element
{
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return false|Header
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*<<(?P<struct>.*)/is', $content)) {
preg_match_all('/(.*?)(<<|>>)/s', trim($content), $matches);
$level = 0;
$sub = '';
foreach ($matches[0] as $part) {
$sub .= $part;
$level += (false !== strpos($part, '<<') ? 1 : -1);
if ($level <= 0) {
break;
}
}
$offset += strpos($content, '<<') + \strlen(rtrim($sub));
// Removes '<<' and '>>'.
$sub = trim((string) preg_replace('/^\s*<<(.*)>>\s*$/s', '\\1', $sub));
$position = 0;
$elements = Element::parse($sub, $document, $position);
return new Header($elements, $document);
}
return false;
}
}

View File

@@ -0,0 +1,109 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Element;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Element;
/**
* Class ElementXRef
*/
class ElementXRef extends Element
{
/**
* @return string
*/
public function getId()
{
return $this->getContent();
}
public function getObject()
{
return $this->document->getObjectById($this->getId());
}
/**
* @return bool
*/
public function equals($value)
{
/**
* In case $value is a number and $this->value is a string like 5_0
*
* Without this if-clause code like:
*
* $element = new ElementXRef('5_0');
* $this->assertTrue($element->equals(5));
*
* would fail (= 5_0 and 5 are not equal in PHP 8.0+).
*/
if (
true === is_numeric($value)
&& true === \is_string($this->getContent())
&& 1 === preg_match('/[0-9]+\_[0-9]+/', $this->getContent(), $matches)
) {
return (float) ($this->getContent()) == $value;
}
$id = ($value instanceof self) ? $value->getId() : $value;
return $this->getId() == $id;
}
/**
* @return string
*/
public function __toString()
{
return '#Obj#'.$this->getId();
}
/**
* @param string $content
* @param Document $document
* @param int $offset
*
* @return bool|ElementXRef
*/
public static function parse($content, Document $document = null, &$offset = 0)
{
if (preg_match('/^\s*(?P<id>[0-9]+\s+[0-9]+\s+R)/s', $content, $match)) {
$id = $match['id'];
$offset += strpos($content, $id) + \strlen($id);
$id = str_replace(' ', '_', rtrim($id, ' R'));
return new self($id, $document);
}
return false;
}
}

View File

@@ -0,0 +1,166 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Exception;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Encoding\PostScriptGlyphs;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Encoding
*/
class Encoding extends PDFObject
{
/**
* @var array
*/
protected $encoding;
/**
* @var array
*/
protected $differences;
/**
* @var array
*/
protected $mapping;
public function init()
{
$this->mapping = [];
$this->differences = [];
$this->encoding = [];
if ($this->has('BaseEncoding')) {
$className = $this->getEncodingClass();
$class = new $className();
$this->encoding = $class->getTranslations();
// Build table including differences.
$differences = $this->get('Differences')->getContent();
$code = 0;
if (!\is_array($differences)) {
return;
}
foreach ($differences as $difference) {
/** @var ElementNumeric $difference */
if ($difference instanceof ElementNumeric) {
$code = $difference->getContent();
continue;
}
// ElementName
$this->differences[$code] = $difference;
if (\is_object($difference)) {
$this->differences[$code] = $difference->getContent();
}
// For the next char.
++$code;
}
$this->mapping = $this->encoding;
foreach ($this->differences as $code => $difference) {
/* @var string $difference */
$this->mapping[$code] = $difference;
}
}
}
/**
* @return array
*/
public function getDetails($deep = true)
{
$details = [];
$details['BaseEncoding'] = ($this->has('BaseEncoding') ? (string) $this->get('BaseEncoding') : 'Ansi');
$details['Differences'] = ($this->has('Differences') ? (string) $this->get('Differences') : '');
$details += parent::getDetails($deep);
return $details;
}
/**
* @return int
*/
public function translateChar($dec)
{
if (isset($this->mapping[$dec])) {
$dec = $this->mapping[$dec];
}
return PostScriptGlyphs::getCodePoint($dec);
}
/**
* Returns the name of the encoding class, if available.
*
* @return string Returns encoding class name if available or empty string (only prior PHP 7.4).
*
* @throws \Exception On PHP 7.4+ an exception is thrown if encoding class doesn't exist.
*/
public function __toString()
{
try {
return $this->getEncodingClass();
} catch (Exception $e) {
// prior to PHP 7.4 toString has to return an empty string.
if (version_compare(\PHP_VERSION, '7.4.0', '<')) {
return '';
}
throw $e;
}
}
/**
* @return string
*
* @throws EncodingNotFoundException
*/
protected function getEncodingClass()
{
// Load reference table charset.
$baseEncoding = preg_replace('/[^A-Z0-9]/is', '', $this->get('BaseEncoding')->getContent());
$className = '\\Smalot\\PdfParser\\Encoding\\'.$baseEncoding;
if (!class_exists($className)) {
throw new EncodingNotFoundException('Missing encoding data for: "'.$baseEncoding.'".');
}
return $className;
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin1Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin1Encoding
*/
class ISOLatin1Encoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/ISOLatin9Encoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class ISOLatin9Encoding
*/
class ISOLatin9Encoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma minus period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef dotlessi grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron space exclamdown cent '.
'sterling Euro yen Scaron section scaron copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior Zcaron mu paragraph '.
'periodcentered zcaron onesuperior ordmasculine guillemotright '.
'OE oe Ydieresis questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View File

@@ -0,0 +1,78 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://www.opensource.apple.com/source/vim/vim-34/vim/runtime/print/mac-roman.ps
namespace Smalot\PdfParser\Encoding;
/**
* Class MacRomanEncoding
*/
class MacRomanEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma minus period slash '.
'zero one two three four five six seven '.
'eight nine colon semicolon less equal greater question '.
'at A B C D E F G '.
'H I J K L M N O '.
'P Q R S T U V W '.
'X Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g '.
'h i j k l m n o '.
'p q r s t u v w '.
'x y z braceleft bar braceright asciitilde .notdef '.
'Adieresis Aring Ccedilla Eacute Ntilde Odieresis Udieresis aacute '.
'agrave acircumflex adieresis atilde aring ccedilla eacute egrave '.
'ecircumflex edieresis iacute igrave icircumflex idieresis ntilde oacute '.
'ograve ocircumflex odieresis otilde uacute ugrave ucircumflex udieresis '.
'dagger degree cent sterling section bullet paragraph germandbls '.
'registered copyright trademark acute dieresis notequal AE Oslash '.
'infinity plusminus lessequal greaterequal yen mu partialdiff summation '.
'Pi pi integral ordfeminine ordmasculine Omega ae oslash '.
'questiondown exclamdown logicalnot radical florin approxequal delta guillemotleft '.
'guillemotright ellipsis space Agrave Atilde Otilde OE oe '.
'endash emdash quotedblleft quotedblright quoteleft quoteright divide lozenge '.
'ydieresis Ydieresis fraction currency guilsinglleft guilsinglright fi fl '.
'daggerdbl periodcentered quotesinglbase quotedblbase perthousand Acircumflex Ecircumflex Aacute '.
'Edieresis Egrave Iacute Icircumflex Idieresis Igrave Oacute Ocircumflex '.
'heart Ograve Uacute Ucircumflex Ugrave dotlessi circumflex tilde '.
'macron breve dotaccent ring cedilla hungarumlaut ogonek caron';
return explode(' ', $encoding);
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/StandardEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class StandardEncoding
*/
class StandardEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quoteright '.
'parenleft parenright asterisk plus comma hyphen period slash zero '.
'one two three four five six seven eight nine colon semicolon less '.
'equal greater question at A B C D E F G H I J K L M N O P Q R S T U '.
'V W X Y Z bracketleft backslash bracketright asciicircum underscore '.
'quoteleft a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef exclamdown cent '.
'sterling fraction yen florin section currency quotesingle '.
'quotedblleft guillemotleft guilsinglleft guilsinglright fi fl '.
'.notdef endash dagger daggerdbl periodcentered .notdef paragraph '.
'bullet quotesinglbase quotedblbase quotedblright guillemotright '.
'ellipsis perthousand .notdef questiondown .notdef grave acute '.
'circumflex tilde macron breve dotaccent dieresis .notdef ring '.
'cedilla .notdef hungarumlaut ogonek caron emdash .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef AE .notdef '.
'ordfeminine .notdef .notdef .notdef .notdef Lslash Oslash OE '.
'ordmasculine .notdef .notdef .notdef .notdef .notdef ae .notdef '.
'.notdef .notdef dotlessi .notdef .notdef lslash oslash oe germandbls '.
'.notdef .notdef .notdef .notdef';
return explode(' ', $encoding);
}
}

View File

@@ -0,0 +1,74 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
// Source : http://cpansearch.perl.org/src/JV/PostScript-Font-1.10.02/lib/PostScript/WinANSIEncoding.pm
namespace Smalot\PdfParser\Encoding;
/**
* Class WinAnsiEncoding
*/
class WinAnsiEncoding
{
public function getTranslations()
{
$encoding =
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'.notdef .notdef .notdef .notdef .notdef .notdef .notdef .notdef '.
'space exclam quotedbl numbersign dollar percent ampersand quotesingle '.
'parenleft parenright asterisk plus comma hyphen period slash zero one '.
'two three four five six seven eight nine colon semicolon less equal '.
'greater question at A B C D E F G H I J K L M N O P Q R S T U V W X '.
'Y Z bracketleft backslash bracketright asciicircum underscore '.
'grave a b c d e f g h i j k l m n o p q r s t u v w x y z '.
'braceleft bar braceright asciitilde bullet Euro bullet quotesinglbase '.
'florin quotedblbase ellipsis dagger daggerdbl circumflex perthousand '.
'Scaron guilsinglleft OE bullet Zcaron bullet bullet quoteleft quoteright '.
'quotedblleft quotedblright bullet endash emdash tilde trademark scaron '.
'guilsinglright oe bullet zcaron Ydieresis space exclamdown cent '.
'sterling currency yen brokenbar section dieresis copyright '.
'ordfeminine guillemotleft logicalnot hyphen registered macron degree '.
'plusminus twosuperior threesuperior acute mu paragraph '.
'periodcentered cedilla onesuperior ordmasculine guillemotright '.
'onequarter onehalf threequarters questiondown Agrave Aacute '.
'Acircumflex Atilde Adieresis Aring AE Ccedilla Egrave Eacute '.
'Ecircumflex Edieresis Igrave Iacute Icircumflex Idieresis Eth Ntilde '.
'Ograve Oacute Ocircumflex Otilde Odieresis multiply Oslash Ugrave '.
'Uacute Ucircumflex Udieresis Yacute Thorn germandbls agrave aacute '.
'acircumflex atilde adieresis aring ae ccedilla egrave eacute '.
'ecircumflex edieresis igrave iacute icircumflex idieresis eth ntilde '.
'ograve oacute ocircumflex otilde odieresis divide oslash ugrave '.
'uacute ucircumflex udieresis yacute thorn ydieresis';
return explode(' ', $encoding);
}
}

View File

@@ -0,0 +1,7 @@
<?php
namespace Smalot\PdfParser\Exception;
class EncodingNotFoundException extends \Exception
{
}

View File

@@ -0,0 +1,511 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Encoding\WinAnsiEncoding;
use Smalot\PdfParser\Exception\EncodingNotFoundException;
/**
* Class Font
*/
class Font extends PDFObject
{
const MISSING = '?';
/**
* @var array
*/
protected $table = null;
/**
* @var array
*/
protected $tableSizes = null;
public function init()
{
// Load translate table.
$this->loadTranslateTable();
}
/**
* @return string
*/
public function getName()
{
return $this->has('BaseFont') ? (string) $this->get('BaseFont') : '[Unknown]';
}
/**
* @return string
*/
public function getType()
{
return (string) $this->header->get('Subtype');
}
/**
* @return array
*/
public function getDetails($deep = true)
{
$details = [];
$details['Name'] = $this->getName();
$details['Type'] = $this->getType();
$details['Encoding'] = ($this->has('Encoding') ? (string) $this->get('Encoding') : 'Ansi');
$details += parent::getDetails($deep);
return $details;
}
/**
* @param string $char
* @param bool $use_default
*
* @return string|bool
*/
public function translateChar($char, $use_default = true)
{
$dec = hexdec(bin2hex($char));
if (\array_key_exists($dec, $this->table)) {
return $this->table[$dec];
}
// fallback for decoding single-byte ANSI characters that are not in the lookup table
$fallbackDecoded = $char;
if (
\strlen($char) < 2
&& $this->has('Encoding')
&& $this->get('Encoding') instanceof Encoding
) {
try {
if (WinAnsiEncoding::class === $this->get('Encoding')->__toString()) {
$fallbackDecoded = self::uchr($dec);
}
} catch (EncodingNotFoundException $e) {
// Encoding->getEncodingClass() throws EncodingNotFoundException when BaseEncoding doesn't exists
// See table 5.11 on PDF 1.5 specs for more info
}
}
return $use_default ? self::MISSING : $fallbackDecoded;
}
/**
* @param int $code
*
* @return string
*/
public static function uchr($code)
{
// html_entity_decode() will not work with UTF-16 or UTF-32 char entities,
// therefore, we use mb_convert_encoding() instead
return mb_convert_encoding('&#'.((int) $code).';', 'UTF-8', 'HTML-ENTITIES');
}
/**
* @return array
*/
public function loadTranslateTable()
{
if (null !== $this->table) {
return $this->table;
}
$this->table = [];
$this->tableSizes = [
'from' => 1,
'to' => 1,
];
if ($this->has('ToUnicode')) {
$content = $this->get('ToUnicode')->getContent();
$matches = [];
// Support for multiple spacerange sections
if (preg_match_all('/begincodespacerange(?P<sections>.*?)endcodespacerange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes = [
'from' => max(1, \strlen(current($matches['from'])) / 2),
'to' => max(1, \strlen(current($matches['to'])) / 2),
];
break;
}
}
// Support for multiple bfchar sections
if (preg_match_all('/beginbfchar(?P<sections>.*?)endbfchar/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
$regexp = '/<(?P<from>[0-9A-F]+)> +<(?P<to>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
$this->tableSizes['from'] = max(1, \strlen(current($matches['from'])) / 2);
foreach ($matches['from'] as $key => $from) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$matches['to'][$key],
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[hexdec($from)] = $text;
}
}
}
// Support for multiple bfrange sections
if (preg_match_all('/beginbfrange(?P<sections>.*?)endbfrange/s', $content, $matches)) {
foreach ($matches['sections'] as $section) {
// Support for : <srcCode1> <srcCode2> <dstString>
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *<(?P<offset>[0-9A-F]+)>[ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$char_to = hexdec($matches['to'][$key]);
$offset = hexdec($matches['offset'][$key]);
for ($char = $char_from; $char <= $char_to; ++$char) {
$this->table[$char] = self::uchr($char - $char_from + $offset);
}
}
// Support for : <srcCode1> <srcCodeN> [<dstString1> <dstString2> ... <dstStringN>]
// Some PDF file has 2-byte Unicode values on new lines > added \r\n
$regexp = '/<(?P<from>[0-9A-F]+)> *<(?P<to>[0-9A-F]+)> *\[(?P<strings>[\r\n<>0-9A-F ]+)\][ \r\n]+/is';
preg_match_all($regexp, $section, $matches);
foreach ($matches['from'] as $key => $from) {
$char_from = hexdec($from);
$strings = [];
preg_match_all('/<(?P<string>[0-9A-F]+)> */is', $matches['strings'][$key], $strings);
foreach ($strings['string'] as $position => $string) {
$parts = preg_split(
'/([0-9A-F]{4})/i',
$string,
0,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$text = '';
foreach ($parts as $part) {
$text .= self::uchr(hexdec($part));
}
$this->table[$char_from + $position] = $text;
}
}
}
}
}
return $this->table;
}
/**
* @param array $table
*/
public function setTable($table)
{
$this->table = $table;
}
/**
* @param string $hexa
* @param bool $add_braces
*
* @return string
*/
public static function decodeHexadecimal($hexa, $add_braces = false)
{
// Special shortcut for XML content.
if (false !== stripos($hexa, '<?xml')) {
return $hexa;
}
$text = '';
$parts = preg_split('/(<[a-f0-9]+>)/si', $hexa, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
foreach ($parts as $part) {
if (preg_match('/^<.*>$/s', $part) && false === stripos($part, '<?xml')) {
// strip line breaks
$part = preg_replace("/[\r\n]/", '', $part);
$part = trim($part, '<>');
if ($add_braces) {
$text .= '(';
}
$part = pack('H*', $part);
$text .= ($add_braces ? preg_replace('/\\\/s', '\\\\\\', $part) : $part);
if ($add_braces) {
$text .= ')';
}
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param string $text
*
* @return string
*/
public static function decodeOctal($text)
{
$parts = preg_split('/(\\\\[0-7]{3})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^\\\\[0-7]{3}$/', $part)) {
$text .= \chr(octdec(trim($part, '\\')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param string $text
*
* @return string
*/
public static function decodeEntities($text)
{
$parts = preg_split('/(#\d{2})/s', $text, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE);
$text = '';
foreach ($parts as $part) {
if (preg_match('/^#\d{2}$/', $part)) {
$text .= \chr(hexdec(trim($part, '#')));
} else {
$text .= $part;
}
}
return $text;
}
/**
* @param string $text
*
* @return string
*/
public static function decodeUnicode($text)
{
if (preg_match('/^\xFE\xFF/i', $text)) {
// Strip U+FEFF byte order marker.
$decode = substr($text, 2);
$text = '';
$length = \strlen($decode);
for ($i = 0; $i < $length; $i += 2) {
$text .= self::uchr(hexdec(bin2hex(substr($decode, $i, 2))));
}
}
return $text;
}
/**
* @return int
*
* @todo Deprecated, use $this->config->getFontSpaceLimit() instead.
*/
protected function getFontSpaceLimit()
{
return $this->config->getFontSpaceLimit();
}
/**
* @param array $commands
*
* @return string
*/
public function decodeText($commands)
{
$text = '';
$word_position = 0;
$words = [];
$unicode = false;
$font_space = $this->getFontSpaceLimit();
foreach ($commands as $command) {
switch ($command[PDFObject::TYPE]) {
case 'n':
if ((float) (trim($command[PDFObject::COMMAND])) < $font_space) {
$word_position = \count($words);
}
continue 2;
case '<':
// Decode hexadecimal.
$text = self::decodeHexadecimal('<'.$command[PDFObject::COMMAND].'>');
break;
default:
// Decode octal (if necessary).
$text = self::decodeOctal($command[PDFObject::COMMAND]);
}
// replace escaped chars
$text = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\f', '\ '],
['\\', '(', ')', "\n", "\r", "\t", "\f", ' '],
$text
);
// add content to result string
if (isset($words[$word_position])) {
$words[$word_position] .= $text;
} else {
$words[$word_position] = $text;
}
}
foreach ($words as &$word) {
$word = $this->decodeContent($word);
}
return implode(' ', $words);
}
/**
* @param string $text
* @param bool $unicode This parameter is deprecated and might be removed in a future release
*
* @return string
*/
public function decodeContent($text, &$unicode = null)
{
if ($this->has('ToUnicode')) {
$bytes = $this->tableSizes['from'];
if ($bytes) {
$result = '';
$length = \strlen($text);
for ($i = 0; $i < $length; $i += $bytes) {
$char = substr($text, $i, $bytes);
if (false !== ($decoded = $this->translateChar($char, false))) {
$char = $decoded;
} elseif ($this->has('DescendantFonts')) {
if ($this->get('DescendantFonts') instanceof PDFObject) {
$fonts = $this->get('DescendantFonts')->getHeader()->getElements();
} else {
$fonts = $this->get('DescendantFonts')->getContent();
}
$decoded = false;
foreach ($fonts as $font) {
if ($font instanceof self) {
if (false !== ($decoded = $font->translateChar($char, false))) {
$decoded = mb_convert_encoding($decoded, 'UTF-8', 'Windows-1252');
break;
}
}
}
if (false !== $decoded) {
$char = $decoded;
} else {
$char = mb_convert_encoding($char, 'UTF-8', 'Windows-1252');
}
} else {
$char = self::MISSING;
}
$result .= $char;
}
$text = $result;
}
} elseif ($this->has('Encoding') && $this->get('Encoding') instanceof Encoding) {
/** @var Encoding $encoding */
$encoding = $this->get('Encoding');
$unicode = mb_check_encoding($text, 'UTF-8');
$result = '';
if ($unicode) {
$chars = preg_split(
'//s'.($unicode ? 'u' : ''),
$text,
-1,
\PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY
);
foreach ($chars as $char) {
$dec_av = hexdec(bin2hex($char));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap);
}
} else {
$length = \strlen($text);
for ($i = 0; $i < $length; ++$i) {
$dec_av = hexdec(bin2hex($text[$i]));
$dec_ap = $encoding->translateChar($dec_av);
$result .= self::uchr($dec_ap);
}
}
$text = $result;
} elseif ($this->get('Encoding') instanceof Element &&
$this->get('Encoding')->equals('MacRomanEncoding')) {
// mb_convert_encoding does not support MacRoman/macintosh,
// so we use iconv() here
$text = iconv('macintosh', 'UTF-8', $text);
} elseif (!mb_check_encoding($text, 'UTF-8')) {
// don't double-encode strings already in UTF-8
$text = mb_convert_encoding($text, 'UTF-8', 'Windows-1252');
}
return $text;
}
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType0
*/
class FontCIDFontType0 extends Font
{
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontCIDFontType2
*/
class FontCIDFontType2 extends Font
{
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontTrueType
*/
class FontTrueType extends Font
{
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType0
*/
class FontType0 extends Font
{
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType1
*/
class FontType1 extends Font
{
}

View File

@@ -0,0 +1,40 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\Font;
use Smalot\PdfParser\Font;
/**
* Class FontType3
*/
class FontType3 extends Font
{
}

View File

@@ -0,0 +1,207 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementStruct;
use Smalot\PdfParser\Element\ElementXRef;
/**
* Class Header
*/
class Header
{
/**
* @var Document
*/
protected $document = null;
/**
* @var Element[]
*/
protected $elements = null;
/**
* @param Element[] $elements list of elements
* @param Document $document document
*/
public function __construct($elements = [], Document $document = null)
{
$this->elements = $elements;
$this->document = $document;
}
public function init()
{
foreach ($this->elements as $element) {
if ($element instanceof Element) {
$element->init();
}
}
}
/**
* Returns all elements.
*/
public function getElements()
{
foreach ($this->elements as $name => $element) {
$this->resolveXRef($name);
}
return $this->elements;
}
/**
* Used only for debug.
*
* @return array
*/
public function getElementTypes()
{
$types = [];
foreach ($this->elements as $key => $element) {
$types[$key] = \get_class($element);
}
return $types;
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
$values = [];
$elements = $this->getElements();
foreach ($elements as $key => $element) {
if ($element instanceof self && $deep) {
$values[$key] = $element->getDetails($deep);
} elseif ($element instanceof PDFObject && $deep) {
$values[$key] = $element->getDetails(false);
} elseif ($element instanceof ElementArray) {
if ($deep) {
$values[$key] = $element->getDetails();
}
} elseif ($element instanceof Element) {
$values[$key] = (string) $element;
}
}
return $values;
}
/**
* Indicate if an element name is available in header.
*
* @param string $name The name of the element
*
* @return bool
*/
public function has($name)
{
return \array_key_exists($name, $this->elements);
}
/**
* @param string $name
*
* @return Element|PDFObject
*/
public function get($name)
{
if (\array_key_exists($name, $this->elements)) {
return $this->resolveXRef($name);
}
return new ElementMissing();
}
/**
* Resolve XRef to object.
*
* @param string $name
*
* @return Element|PDFObject
*
* @throws \Exception
*/
protected function resolveXRef($name)
{
if (($obj = $this->elements[$name]) instanceof ElementXRef && null !== $this->document) {
/** @var ElementXRef $obj */
$object = $this->document->getObjectById($obj->getId());
if (null === $object) {
return new ElementMissing();
}
// Update elements list for future calls.
$this->elements[$name] = $object;
}
return $this->elements[$name];
}
/**
* @param string $content The content to parse
* @param Document $document The document
* @param int $position The new position of the cursor after parsing
*
* @return Header
*/
public static function parse($content, Document $document, &$position = 0)
{
/* @var Header $header */
if ('<<' == substr(trim($content), 0, 2)) {
$header = ElementStruct::parse($content, $document, $position);
} else {
$elements = ElementArray::parse($content, $document, $position);
$header = new self([], $document);
if ($elements) {
$header = new self($elements->getRawContent(), null);
}
}
if ($header) {
return $header;
}
// Build an empty header.
return new self([], $document);
}
}

View File

@@ -0,0 +1,820 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\XObject\Form;
use Smalot\PdfParser\XObject\Image;
/**
* Class PDFObject
*/
class PDFObject
{
const TYPE = 't';
const OPERATOR = 'o';
const COMMAND = 'c';
/**
* The recursion stack.
*
* @var array
*/
public static $recursionStack = [];
/**
* @var Document
*/
protected $document = null;
/**
* @var Header
*/
protected $header = null;
/**
* @var string
*/
protected $content = null;
/**
* @var Config
*/
protected $config;
/**
* @param Header $header
* @param string $content
* @param Config $config
*/
public function __construct(
Document $document,
Header $header = null,
$content = null,
Config $config = null
) {
$this->document = $document;
$this->header = null !== $header ? $header : new Header();
$this->content = $content;
$this->config = $config;
}
public function init()
{
}
/**
* @return Header|null
*/
public function getHeader()
{
return $this->header;
}
/**
* @param string $name
*
* @return Element|PDFObject
*/
public function get($name)
{
return $this->header->get($name);
}
/**
* @param string $name
*
* @return bool
*/
public function has($name)
{
return $this->header->has($name);
}
/**
* @param bool $deep
*
* @return array
*/
public function getDetails($deep = true)
{
return $this->header->getDetails($deep);
}
/**
* @return string|null
*/
public function getContent()
{
return $this->content;
}
/**
* @param string $content
*/
public function cleanContent($content, $char = 'X')
{
$char = $char[0];
$content = str_replace(['\\\\', '\\)', '\\('], $char.$char, $content);
// Remove image bloc with binary content
preg_match_all('/\s(BI\s.*?(\sID\s).*?(\sEI))\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[0] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in square brackets [.....]
preg_match_all('/\[((\(.*?\)|[0-9\.\-\s]*)*)\]/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean content in round brackets (.....)
preg_match_all('/\((.*?)\)/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
// Clean structure
if ($parts = preg_split('/(<|>)/s', $content, -1, \PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE)) {
$content = '';
$level = 0;
foreach ($parts as $part) {
if ('<' == $part) {
++$level;
}
$content .= (0 == $level ? $part : str_repeat($char, \strlen($part)));
if ('>' == $part) {
--$level;
}
}
}
// Clean BDC and EMC markup
preg_match_all(
'/(\/[A-Za-z0-9\_]*\s*'.preg_quote($char).'*BDC)/s',
$content,
$matches,
\PREG_OFFSET_CAPTURE
);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
preg_match_all('/\s(EMC)\s/s', $content, $matches, \PREG_OFFSET_CAPTURE);
foreach ($matches[1] as $part) {
$content = substr_replace($content, str_repeat($char, \strlen($part[0])), $part[1], \strlen($part[0]));
}
return $content;
}
/**
* @param string $content
*
* @return array
*/
public function getSectionsText($content)
{
$sections = [];
$content = ' '.$content.' ';
$textCleaned = $this->cleanContent($content, '_');
// Extract text blocks.
if (preg_match_all('/(\sQ)?\s+BT[\s|\(|\[]+(.*?)\s*ET(\sq)?/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[2] as $pos => $part) {
$text = $part[0];
if ('' === $text) {
continue;
}
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
// Removes BDC and EMC markup.
$section = preg_replace('/(\/[A-Za-z0-9]+\s*<<.*?)(>>\s*BDC)(.*?)(EMC\s+)/s', '${3}', $section.' ');
// Add Q and q flags if detected around BT/ET.
// @see: https://github.com/smalot/pdfparser/issues/387
$section = trim((!empty($matches[1][$pos][0]) ? "Q\n" : '').$section).(!empty($matches[3][$pos][0]) ? "\nq" : '');
$sections[] = $section;
}
}
// Extract 'do' commands.
if (preg_match_all('/(\/[A-Za-z0-9\.\-_]+\s+Do)\s/s', $textCleaned, $matches, \PREG_OFFSET_CAPTURE)) {
foreach ($matches[1] as $part) {
$text = $part[0];
$offset = $part[1];
$section = substr($content, $offset, \strlen($text));
$sections[] = $section;
}
}
return $sections;
}
private function getDefaultFont(Page $page = null)
{
$fonts = [];
if (null !== $page) {
$fonts = $page->getFonts();
}
$fonts = array_merge($fonts, array_values($this->document->getFonts()));
if (\count($fonts) > 0) {
return reset($fonts);
}
return new Font($this->document, null, null, $this->config);
}
/**
* @param Page $page
*
* @return string
*
* @throws \Exception
*/
public function getText(Page $page = null)
{
$result = '';
$sections = $this->getSectionsText($this->content);
$current_font = $this->getDefaultFont($page);
$clipped_font = $current_font;
$current_position_td = ['x' => false, 'y' => false];
$current_position_tm = ['x' => false, 'y' => false];
self::$recursionStack[] = $this->getUniqueId();
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
$reverse_text = false;
$text = '';
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
case 'BMC':
if ('ReversedChars' == $command[self::COMMAND]) {
$reverse_text = true;
}
break;
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (((float) $x <= 0) ||
(false !== $current_position_td['y'] && (float) $y < (float) ($current_position_td['y']))
) {
// vertical offset
$text .= "\n";
} elseif (false !== $current_position_td['x'] && (float) $x > (float) (
$current_position_td['x']
)
) {
// horizontal offset
$text .= ' ';
}
$current_position_td = ['x' => $x, 'y' => $y];
break;
// move text current point and set leading
case 'TD':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if ((float) $y < 0) {
$text .= "\n";
} elseif ((float) $x <= 0) {
$text .= ' ';
}
break;
case 'Tf':
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
if (null !== $page) {
$new_font = $page->getFont($id);
// If an invalid font ID is given, do not update the font.
// This should theoretically never happen, as the PDF spec states for the Tf operator:
// "The specified font value shall match a resource name in the Font entry of the default resource dictionary"
// (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 435)
// But we want to make sure that malformed PDFs do not simply crash.
if (null !== $new_font) {
$current_font = $new_font;
}
}
break;
case 'Q':
// Use clip: restore font.
$current_font = $clipped_font;
break;
case 'q':
// Use clip: save font.
$clipped_font = $current_font;
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text .= $sub_text;
break;
// set leading
case 'TL':
$text .= ' ';
break;
case 'Tm':
$args = preg_split('/\s/s', $command[self::COMMAND]);
$y = array_pop($args);
$x = array_pop($args);
if (false !== $current_position_tm['x']) {
$delta = abs((float) $x - (float) ($current_position_tm['x']));
if ($delta > 10) {
$text .= "\t";
}
}
if (false !== $current_position_tm['y']) {
$delta = abs((float) $y - (float) ($current_position_tm['y']));
if ($delta > 10) {
$text .= "\n";
}
}
$current_position_tm = ['x' => $x, 'y' => $y];
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
$text .= "\n";
break;
// move to start of next line
case 'T*':
$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
$xobject = $page->getXObject($id);
// @todo $xobject could be a ElementXRef object, which would then throw an error
if (\is_object($xobject) && $xobject instanceof self && !\in_array($xobject->getUniqueId(), self::$recursionStack)) {
// Not a circular reference.
$text .= $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
// Fix Hebrew and other reverse text oriented languages.
// @see: https://github.com/smalot/pdfparser/issues/398
if ($reverse_text) {
$chars = mb_str_split($text, 1, mb_internal_encoding());
$text = implode('', array_reverse($chars));
}
$result .= $text;
}
array_pop(self::$recursionStack);
return $result.' ';
}
/**
* @param Page $page
*
* @return array
*
* @throws \Exception
*/
public function getTextArray(Page $page = null)
{
$text = [];
$sections = $this->getSectionsText($this->content);
$current_font = new Font($this->document, null, null, $this->config);
foreach ($sections as $section) {
$commands = $this->getCommandsText($section);
foreach ($commands as $command) {
switch ($command[self::OPERATOR]) {
// set character spacing
case 'Tc':
break;
// move text current point
case 'Td':
break;
// move text current point and set leading
case 'TD':
break;
case 'Tf':
if (null !== $page) {
list($id) = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim($id, '/');
$current_font = $page->getFont($id);
}
break;
case "'":
case 'Tj':
$command[self::COMMAND] = [$command];
// no break
case 'TJ':
$sub_text = $current_font->decodeText($command[self::COMMAND]);
$text[] = $sub_text;
break;
// set leading
case 'TL':
break;
case 'Tm':
break;
// set super/subscripting text rise
case 'Ts':
break;
// set word spacing
case 'Tw':
break;
// set horizontal scaling
case 'Tz':
//$text .= "\n";
break;
// move to start of next line
case 'T*':
//$text .= "\n";
break;
case 'Da':
break;
case 'Do':
if (null !== $page) {
$args = preg_split('/\s/s', $command[self::COMMAND]);
$id = trim(array_pop($args), '/ ');
if ($xobject = $page->getXObject($id)) {
$text[] = $xobject->getText($page);
}
}
break;
case 'rg':
case 'RG':
break;
case 're':
break;
case 'co':
break;
case 'cs':
break;
case 'gs':
break;
case 'en':
break;
case 'sc':
case 'SC':
break;
case 'g':
case 'G':
break;
case 'V':
break;
case 'vo':
case 'Vo':
break;
default:
}
}
}
return $text;
}
/**
* @param string $text_part
* @param int $offset
*
* @return array
*/
public function getCommandsText($text_part, &$offset = 0)
{
$commands = $matches = [];
while ($offset < \strlen($text_part)) {
$offset += strspn($text_part, "\x00\x09\x0a\x0c\x0d\x20", $offset);
$char = $text_part[$offset];
$operator = '';
$type = '';
$command = false;
switch ($char) {
case '/':
$type = $char;
if (preg_match(
'/^\/([A-Z0-9\._,\+]+\s+[0-9.\-]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
} elseif (preg_match(
'/^\/([A-Z0-9\._,\+]+)\s+([A-Z]+)\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = $matches[2];
$command = $matches[1];
$offset += \strlen($matches[0]);
}
break;
case '[':
case ']':
// array object
$type = $char;
if ('[' == $char) {
++$offset;
// get elements
$command = $this->getCommandsText($text_part, $offset);
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
} else {
++$offset;
break;
}
break;
case '<':
case '>':
// array object
$type = $char;
++$offset;
if ('<' == $char) {
$strpos = strpos($text_part, '>', $offset);
$command = substr($text_part, $offset, ($strpos - $offset));
$offset = $strpos + 1;
}
if (preg_match('/^\s*[A-Z]{1,2}\s*/si', substr($text_part, $offset), $matches)) {
$operator = trim($matches[0]);
$offset += \strlen($matches[0]);
}
break;
case '(':
case ')':
++$offset;
$type = $char;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($text_part[$strpos])) {
break;
}
$ch = $text_part[$strpos];
switch ($ch) {
case '\\':
// REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(':
// LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')':
// RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$command = substr($text_part, $offset, ($strpos - $offset - 1));
$offset = $strpos;
if (preg_match('/^\s*([A-Z\']{1,2})\s*/si', substr($text_part, $offset), $matches)) {
$operator = $matches[1];
$offset += \strlen($matches[0]);
}
}
break;
default:
if ('ET' == substr($text_part, $offset, 2)) {
break;
} elseif (preg_match(
'/^\s*(?P<data>([0-9\.\-]+\s*?)+)\s+(?P<id>[A-Z]{1,3})\s*/si',
substr($text_part, $offset),
$matches
)
) {
$operator = trim($matches['id']);
$command = trim($matches['data']);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([0-9\.\-]+\s*?)+\s*/si', substr($text_part, $offset), $matches)) {
$type = 'n';
$command = trim($matches[0]);
$offset += \strlen($matches[0]);
} elseif (preg_match('/^\s*([A-Z\*]+)\s*/si', substr($text_part, $offset), $matches)) {
$type = '';
$operator = $matches[1];
$command = '';
$offset += \strlen($matches[0]);
}
}
if (false !== $command) {
$commands[] = [
self::TYPE => $type,
self::OPERATOR => $operator,
self::COMMAND => $command,
];
} else {
break;
}
}
return $commands;
}
/**
* @param string $content
*
* @return PDFObject
*/
public static function factory(
Document $document,
Header $header,
$content,
Config $config = null
) {
switch ($header->get('Type')->getContent()) {
case 'XObject':
switch ($header->get('Subtype')->getContent()) {
case 'Image':
return new Image($document, $header, $config->getRetainImageContent() ? $content : null, $config);
case 'Form':
return new Form($document, $header, $content, $config);
}
return new self($document, $header, $content, $config);
case 'Pages':
return new Pages($document, $header, $content, $config);
case 'Page':
return new Page($document, $header, $content, $config);
case 'Encoding':
return new Encoding($document, $header, $content, $config);
case 'Font':
$subtype = $header->get('Subtype')->getContent();
$classname = '\Smalot\PdfParser\Font\Font'.$subtype;
if (class_exists($classname)) {
return new $classname($document, $header, $content, $config);
}
return new Font($document, $header, $content, $config);
default:
return new self($document, $header, $content, $config);
}
}
/**
* Returns unique id identifying the object.
*
* @return string
*/
protected function getUniqueId()
{
return spl_object_hash($this);
}
}

View File

@@ -0,0 +1,804 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementMissing;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementXRef;
class Page extends PDFObject
{
/**
* @var Font[]
*/
protected $fonts = null;
/**
* @var PDFObject[]
*/
protected $xobjects = null;
/**
* @var array
*/
protected $dataTm = null;
/**
* @return Font[]
*/
public function getFonts()
{
if (null !== $this->fonts) {
return $this->fonts;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('Font')) {
if ($resources->get('Font') instanceof ElementMissing) {
return [];
}
if ($resources->get('Font') instanceof Header) {
$fonts = $resources->get('Font')->getElements();
} else {
$fonts = $resources->get('Font')->getHeader()->getElements();
}
$table = [];
foreach ($fonts as $id => $font) {
if ($font instanceof Font) {
$table[$id] = $font;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $font;
}
}
}
return $this->fonts = $table;
}
return [];
}
/**
* @param string $id
*
* @return Font|null
*/
public function getFont($id)
{
$fonts = $this->getFonts();
if (isset($fonts[$id])) {
return $fonts[$id];
}
// According to the PDF specs (https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf, page 238)
// "The font resource name presented to the Tf operator is arbitrary, as are the names for all kinds of resources"
// Instead, we search for the unfiltered name first and then do this cleaning as a fallback, so all tests still pass.
if (isset($fonts[$id])) {
return $fonts[$id];
} else {
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($fonts[$id])) {
return $fonts[$id];
}
}
return null;
}
/**
* Support for XObject
*
* @return PDFObject[]
*/
public function getXObjects()
{
if (null !== $this->xobjects) {
return $this->xobjects;
}
$resources = $this->get('Resources');
if (method_exists($resources, 'has') && $resources->has('XObject')) {
if ($resources->get('XObject') instanceof Header) {
$xobjects = $resources->get('XObject')->getElements();
} else {
$xobjects = $resources->get('XObject')->getHeader()->getElements();
}
$table = [];
foreach ($xobjects as $id => $xobject) {
$table[$id] = $xobject;
// Store too on cleaned id value (only numeric)
$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if ('' != $id) {
$table[$id] = $xobject;
}
}
return $this->xobjects = $table;
}
return [];
}
/**
* @param string $id
*
* @return PDFObject|null
*/
public function getXObject($id)
{
$xobjects = $this->getXObjects();
if (isset($xobjects[$id])) {
return $xobjects[$id];
}
return null;
/*$id = preg_replace('/[^0-9\.\-_]/', '', $id);
if (isset($xobjects[$id])) {
return $xobjects[$id];
} else {
return null;
}*/
}
/**
* @param Page $page
*
* @return string
*/
public function getText(self $page = null)
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return '';
} elseif ($contents instanceof ElementNull) {
return '';
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
return $contents->getText($this);
}
return '';
}
/**
* @param Page $page
*
* @return array
*/
public function getTextArray(self $page = null)
{
if ($contents = $this->get('Contents')) {
if ($contents instanceof ElementMissing) {
return [];
} elseif ($contents instanceof ElementNull) {
return [];
} elseif ($contents instanceof PDFObject) {
$elements = $contents->getHeader()->getElements();
if (is_numeric(key($elements))) {
$new_content = '';
/** @var PDFObject $element */
foreach ($elements as $element) {
if ($element instanceof ElementXRef) {
$new_content .= $element->getObject()->getContent();
} else {
$new_content .= $element->getContent();
}
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
} elseif ($contents instanceof ElementArray) {
// Create a virtual global content.
$new_content = '';
/** @var PDFObject $content */
foreach ($contents->getContent() as $content) {
$new_content .= $content->getContent()."\n";
}
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $new_content, $this->config);
}
return $contents->getTextArray($this);
}
return [];
}
/**
* Gets all the text data with its internal representation of the page.
*
* @return array An array with the data and the internal representation
*/
public function extractRawData()
{
/*
* Now you can get the complete content of the object with the text on it
*/
$extractedData = [];
$content = $this->get('Contents');
$values = $content->getContent();
if (isset($values) && \is_array($values)) {
$text = '';
foreach ($values as $section) {
$text .= $section->getContent();
}
$sectionsText = $this->getSectionsText($text);
foreach ($sectionsText as $sectionText) {
$commandsText = $this->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
} else {
$sectionsText = $content->getSectionsText($content->getContent());
foreach ($sectionsText as $sectionText) {
$extractedData[] = ['t' => '', 'o' => 'BT', 'c' => ''];
$commandsText = $content->getCommandsText($sectionText);
foreach ($commandsText as $command) {
$extractedData[] = $command;
}
}
}
return $extractedData;
}
/**
* Gets all the decoded text data with it internal representation from a page.
*
* @param array $extractedRawData the extracted data return by extractRawData or
* null if extractRawData should be called
*
* @return array An array with the data and the internal representation
*/
public function extractDecodedRawData($extractedRawData = null)
{
if (!isset($extractedRawData) || !$extractedRawData) {
$extractedRawData = $this->extractRawData();
}
$currentFont = null;
$clippedFont = null;
foreach ($extractedRawData as &$command) {
if ('Tj' == $command['o'] || 'TJ' == $command['o']) {
$data = $command['c'];
if (!\is_array($data)) {
$tmpText = '';
if (isset($currentFont)) {
$tmpText = $currentFont->decodeOctal($data);
//$tmpText = $currentFont->decodeHexadecimal($tmpText, false);
}
$tmpText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$tmpText
);
$tmpText = utf8_encode($tmpText);
if (isset($currentFont)) {
$tmpText = $currentFont->decodeContent($tmpText);
}
$command['c'] = $tmpText;
continue;
}
$numText = \count($data);
for ($i = 0; $i < $numText; ++$i) {
if (0 != ($i % 2)) {
continue;
}
$tmpText = $data[$i]['c'];
$decodedText = '';
if (isset($currentFont)) {
$decodedText = $currentFont->decodeOctal($tmpText);
//$tmpText = $currentFont->decodeHexadecimal($tmpText, false);
}
$decodedText = str_replace(
['\\\\', '\(', '\)', '\n', '\r', '\t', '\ '],
['\\', '(', ')', "\n", "\r", "\t", ' '],
$decodedText
);
$decodedText = utf8_encode($decodedText);
if (isset($currentFont)) {
$decodedText = $currentFont->decodeContent($decodedText);
}
$command['c'][$i]['c'] = $decodedText;
continue;
}
} elseif ('Tf' == $command['o'] || 'TF' == $command['o']) {
$fontId = explode(' ', $command['c'])[0];
$currentFont = $this->getFont($fontId);
continue;
} elseif ('Q' == $command['o']) {
$currentFont = $clippedFont;
} elseif ('q' == $command['o']) {
$clippedFont = $currentFont;
}
}
return $extractedRawData;
}
/**
* Gets just the Text commands that are involved in text positions and
* Text Matrix (Tm)
*
* It extract just the PDF commands that are involved with text positions, and
* the Text Matrix (Tm). These are: BT, ET, TL, Td, TD, Tm, T*, Tj, ', ", and TJ
*
* @param array $extractedDecodedRawData The data extracted by extractDecodeRawData.
* If it is null, the method extractDecodeRawData is called.
*
* @return array An array with the text command of the page
*/
public function getDataCommands($extractedDecodedRawData = null)
{
if (!isset($extractedDecodedRawData) || !$extractedDecodedRawData) {
$extractedDecodedRawData = $this->extractDecodedRawData();
}
$extractedData = [];
foreach ($extractedDecodedRawData as $command) {
switch ($command['o']) {
/*
* BT
* Begin a text object, inicializind the Tm and Tlm to identity matrix
*/
case 'BT':
$extractedData[] = $command;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$extractedData[] = $command;
break;
/*
* leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
$extractedData[] = $command;
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$extractedData[] = $command;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$extractedData[] = $command;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$extractedData[] = $command;
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$extractedData[] = $command;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$extractedData[] = $command;
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$extractedData[] = $command;
break;
/*
* aw ac string "
* Move to the next lkine and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$extractedData[] = $command;
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$extractedData[] = $command;
break;
default:
}
}
return $extractedData;
}
/**
* Gets the Text Matrix of the text in the page
*
* Return an array where every item is an array where the first item is the
* Text Matrix (Tm) and the second is a string with the text data. The Text matrix
* is an array of 6 numbers. The last 2 numbers are the coordinates X and Y of the
* text. The first 4 numbers has to be with Scalation, Rotation and Skew of the text.
*
* @param array $dataCommands the data extracted by getDataCommands
* if null getDataCommands is called
*
* @return array an array with the data of the page including the Tm information
* of any text in the page
*/
public function getDataTm($dataCommands = null)
{
if (!isset($dataCommands) || !$dataCommands) {
$dataCommands = $this->getDataCommands();
}
/*
* At the beginning of a text object Tm is the identity matrix
*/
$defaultTm = ['1', '0', '0', '1', '0', '0'];
/*
* Set the text leading used by T*, ' and " operators
*/
$defaultTl = 0;
/*
* Setting where are the X and Y coordinates in the matrix (Tm)
*/
$x = 4;
$y = 5;
$Tx = 0;
$Ty = 0;
$Tm = $defaultTm;
$Tl = $defaultTl;
$extractedTexts = $this->getTextArray();
$extractedData = [];
foreach ($dataCommands as $command) {
$currentText = $extractedTexts[\count($extractedData)];
switch ($command['o']) {
/*
* BT
* Begin a text object, inicializind the Tm and Tlm to identity matrix
*/
case 'BT':
$Tm = $defaultTm;
$Tl = $defaultTl; //review this.
$Tx = 0;
$Ty = 0;
break;
/*
* ET
* End a text object, discarding the text matrix
*/
case 'ET':
$Tm = $defaultTm;
$Tl = $defaultTl; //review this
$Tx = 0;
$Ty = 0;
break;
/*
* leading TL
* Set the text leading, Tl, to leading. Tl is used by the T*, ' and " operators.
* Initial value: 0
*/
case 'TL':
$Tl = (float) $command['c'];
break;
/*
* tx ty Td
* Move to the start of the next line, offset form the start of the
* current line by tx, ty.
*/
case 'Td':
$coord = explode(' ', $command['c']);
$Tx += (float) $coord[0];
$Ty += (float) $coord[1];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* tx ty TD
* Move to the start of the next line, offset form the start of the
* current line by tx, ty. As a side effect, this operator set the leading
* parameter in the text state. This operator has the same effect as the
* code:
* -ty TL
* tx ty Td
*/
case 'TD':
$coord = explode(' ', $command['c']);
$Tl = (float) $coord[1];
$Tx += (float) $coord[0];
$Ty -= (float) $coord[1];
$Tm[$x] = (string) $Tx;
$Tm[$y] = (string) $Ty;
break;
/*
* a b c d e f Tm
* Set the text matrix, Tm, and the text line matrix, Tlm. The operands are
* all numbers, and the initial value for Tm and Tlm is the identity matrix
* [1 0 0 1 0 0]
*/
case 'Tm':
$Tm = explode(' ', $command['c']);
$Tx = (float) $Tm[$x];
$Ty = (float) $Tm[$y];
break;
/*
* T*
* Move to the start of the next line. This operator has the same effect
* as the code:
* 0 Tl Td
* Where Tl is the current leading parameter in the text state.
*/
case 'T*':
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
break;
/*
* string Tj
* Show a Text String
*/
case 'Tj':
$extractedData[] = [$Tm, $currentText];
break;
/*
* string '
* Move to the next line and show a text string. This operator has the
* same effect as the code:
* T*
* string Tj
*/
case "'":
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $currentText];
break;
/*
* aw ac string "
* Move to the next line and show a text string, using aw as the word
* spacing and ac as the character spacing. This operator has the same
* effect as the code:
* aw Tw
* ac Tc
* string '
* Tw set the word spacing, Tw, to wordSpace.
* Tc Set the character spacing, Tc, to charsSpace.
*/
case '"':
$data = explode(' ', $currentText);
$Ty -= $Tl;
$Tm[$y] = (string) $Ty;
$extractedData[] = [$Tm, $data[2]]; //Verify
break;
/*
* array TJ
* Show one or more text strings allow individual glyph positioning.
* Each lement of array con be a string or a number. If the element is
* a string, this operator shows the string. If it is a number, the
* operator adjust the text position by that amount; that is, it translates
* the text matrix, Tm. This amount is substracted form the current
* horizontal or vertical coordinate, depending on the writing mode.
* in the default coordinate system, a positive adjustment has the effect
* of moving the next glyph painted either to the left or down by the given
* amount.
*/
case 'TJ':
$extractedData[] = [$Tm, $currentText];
break;
default:
}
}
$this->dataTm = $extractedData;
return $extractedData;
}
/**
* Gets text data that are around the given coordinates (X,Y)
*
* If the text is in near the given coordinates (X,Y) (or the TM info),
* the text is returned. The extractedData return by getDataTm, could be use to see
* where is the coordinates of a given text, using the TM info for it.
*
* @param float $x The X value of the coordinate to search for. if null
* just the Y value is considered (same Row)
* @param float $y The Y value of the coordinate to search for
* just the X value is considered (same column)
* @param float $xError The value less or more to consider an X to be "near"
* @param float $yError The value less or more to consider an Y to be "near"
*
* @return array An array of text that are near the given coordinates. If no text
* "near" the x,y coordinate, an empty array is returned. If Both, x
* and y coordinates are null, null is returned.
*/
public function getTextXY($x = null, $y = null, $xError = 0, $yError = 0)
{
if (!isset($this->dataTm) || !$this->dataTm) {
$this->getDataTm();
}
if (null !== $x) {
$x = (float) $x;
}
if (null !== $y) {
$y = (float) $y;
}
if (null === $x && null === $y) {
return [];
}
$xError = (float) $xError;
$yError = (float) $yError;
$extractedData = [];
foreach ($this->dataTm as $item) {
$tm = $item[0];
$xTm = (float) $tm[4];
$yTm = (float) $tm[5];
$text = $item[1];
if (null === $y) {
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (null === $x) {
if (($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
if (($xTm >= ($x - $xError)) &&
($xTm <= ($x + $xError)) &&
($yTm >= ($y - $yError)) &&
($yTm <= ($y + $yError))) {
$extractedData[] = [$tm, $text];
continue;
}
}
return $extractedData;
}
}

View File

@@ -0,0 +1,69 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
/**
* Class Pages
*/
class Pages extends PDFObject
{
/**
* @param bool $deep
*
* @todo Objects other than Pages or Page might need to be treated specifically in order to get Page objects out of them,
* see https://github.com/smalot/pdfparser/issues/331
*
* @return array
*/
public function getPages($deep = false)
{
if (!$this->has('Kids')) {
return [];
}
if (!$deep) {
return $this->get('Kids')->getContent();
}
$kids = $this->get('Kids')->getContent();
$pages = [];
foreach ($kids as $kid) {
if ($kid instanceof self) {
$pages = array_merge($pages, $kid->getPages(true));
} elseif ($kid instanceof Page) {
$pages[] = $kid;
}
}
return $pages;
}
}

View File

@@ -0,0 +1,342 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser;
use Smalot\PdfParser\Element\ElementArray;
use Smalot\PdfParser\Element\ElementBoolean;
use Smalot\PdfParser\Element\ElementDate;
use Smalot\PdfParser\Element\ElementHexa;
use Smalot\PdfParser\Element\ElementName;
use Smalot\PdfParser\Element\ElementNull;
use Smalot\PdfParser\Element\ElementNumeric;
use Smalot\PdfParser\Element\ElementString;
use Smalot\PdfParser\Element\ElementXRef;
use Smalot\PdfParser\RawData\RawDataParser;
/**
* Class Parser
*/
class Parser
{
/**
* @var Config
*/
private $config;
/**
* @var PDFObject[]
*/
protected $objects = [];
protected $rawDataParser;
public function __construct($cfg = [], Config $config = null)
{
$this->config = $config ?: new Config();
$this->rawDataParser = new RawDataParser($cfg, $this->config);
}
/**
* @return Config
*/
public function getConfig()
{
return $this->config;
}
/**
* @param string $filename
*
* @return Document
*
* @throws \Exception
*/
public function parseFile($filename)
{
$content = file_get_contents($filename);
/*
* 2018/06/20 @doganoo as multiple times a
* users have complained that the parseFile()
* method dies silently, it is an better option
* to remove the error control operator (@) and
* let the users know that the method throws an exception
* by adding @throws tag to PHPDoc.
*
* See here for an example: https://github.com/smalot/pdfparser/issues/204
*/
return $this->parseContent($content);
}
/**
* @param string $content PDF content to parse
*
* @return Document
*
* @throws \Exception if secured PDF file was detected
* @throws \Exception if no object list was found
*/
public function parseContent($content)
{
// Create structure from raw data.
list($xref, $data) = $this->rawDataParser->parseData($content);
if (isset($xref['trailer']['encrypt'])) {
throw new \Exception('Secured pdf file are currently not supported.');
}
if (empty($data)) {
throw new \Exception('Object list not found. Possible secured file.');
}
// Create destination object.
$document = new Document();
$this->objects = [];
foreach ($data as $id => $structure) {
$this->parseObject($id, $structure, $document);
unset($data[$id]);
}
$document->setTrailer($this->parseTrailer($xref['trailer'], $document));
$document->setObjects($this->objects);
return $document;
}
protected function parseTrailer($structure, $document)
{
$trailer = [];
foreach ($structure as $name => $values) {
$name = ucfirst($name);
if (is_numeric($values)) {
$trailer[$name] = new ElementNumeric($values);
} elseif (\is_array($values)) {
$value = $this->parseTrailer($values, null);
$trailer[$name] = new ElementArray($value, null);
} elseif (false !== strpos($values, '_')) {
$trailer[$name] = new ElementXRef($values, $document);
} else {
$trailer[$name] = $this->parseHeaderElement('(', $values, $document);
}
}
return new Header($trailer, $document);
}
/**
* @param string $id
* @param array $structure
* @param Document $document
*/
protected function parseObject($id, $structure, $document)
{
$header = new Header([], $document);
$content = '';
foreach ($structure as $position => $part) {
if (\is_int($part)) {
$part = [null, null];
}
switch ($part[0]) {
case '[':
$elements = [];
foreach ($part[1] as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$elements[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
$header = new Header($elements, $document);
break;
case '<<':
$header = $this->parseHeader($part[1], $document);
break;
case 'stream':
$content = isset($part[3][0]) ? $part[3][0] : $part[1];
if ($header->get('Type')->equals('ObjStm')) {
$match = [];
// Split xrefs and contents.
preg_match('/^((\d+\s+\d+\s*)*)(.*)$/s', $content, $match);
$content = $match[3];
// Extract xrefs.
$xrefs = preg_split(
'/(\d+\s+\d+\s*)/s',
$match[1],
-1,
\PREG_SPLIT_NO_EMPTY | \PREG_SPLIT_DELIM_CAPTURE
);
$table = [];
foreach ($xrefs as $xref) {
list($id, $position) = preg_split("/\s+/", trim($xref));
$table[$position] = $id;
}
ksort($table);
$ids = array_values($table);
$positions = array_keys($table);
foreach ($positions as $index => $position) {
$id = $ids[$index].'_0';
$next_position = isset($positions[$index + 1]) ? $positions[$index + 1] : \strlen($content);
$sub_content = substr($content, $position, (int) $next_position - (int) $position);
$sub_header = Header::parse($sub_content, $document);
$object = PDFObject::factory($document, $sub_header, '', $this->config);
$this->objects[$id] = $object;
}
// It is not necessary to store this content.
$content = '';
return;
}
break;
default:
if ('null' != $part) {
$element = $this->parseHeaderElement($part[0], $part[1], $document);
if ($element) {
$header = new Header([$element], $document);
}
}
break;
}
}
if (!isset($this->objects[$id])) {
$this->objects[$id] = PDFObject::factory($document, $header, $content, $this->config);
}
}
/**
* @param array $structure
* @param Document $document
*
* @return Header
*
* @throws \Exception
*/
protected function parseHeader($structure, $document)
{
$elements = [];
$count = \count($structure);
for ($position = 0; $position < $count; $position += 2) {
$name = $structure[$position][1];
$type = $structure[$position + 1][0];
$value = $structure[$position + 1][1];
$elements[$name] = $this->parseHeaderElement($type, $value, $document);
}
return new Header($elements, $document);
}
/**
* @param string $type
* @param string|array $value
* @param Document $document
*
* @return Element|Header|null
*
* @throws \Exception
*/
protected function parseHeaderElement($type, $value, $document)
{
switch ($type) {
case '<<':
case '>>':
$header = $this->parseHeader($value, $document);
PDFObject::factory($document, $header, null, $this->config);
return $header;
case 'numeric':
return new ElementNumeric($value);
case 'boolean':
return new ElementBoolean($value);
case 'null':
return new ElementNull();
case '(':
if ($date = ElementDate::parse('('.$value.')', $document)) {
return $date;
}
return ElementString::parse('('.$value.')', $document);
case '<':
return $this->parseHeaderElement('(', ElementHexa::decode($value, $document), $document);
case '/':
return ElementName::parse('/'.$value, $document);
case 'ojbref': // old mistake in tcpdf parser
case 'objref':
return new ElementXRef($value, $document);
case '[':
$values = [];
if (\is_array($value)) {
foreach ($value as $sub_element) {
$sub_type = $sub_element[0];
$sub_value = $sub_element[1];
$values[] = $this->parseHeaderElement($sub_type, $sub_value, $document);
}
}
return new ElementArray($values, $document);
case 'endstream':
case 'obj': //I don't know what it means but got my project fixed.
case '':
// Nothing to do with.
return null;
default:
throw new \Exception('Invalid type: "'.$type.'".');
}
}
}

View File

@@ -0,0 +1,389 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2020-01-06
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
use Exception;
class FilterHelper
{
protected $availableFilters = ['ASCIIHexDecode', 'ASCII85Decode', 'LZWDecode', 'FlateDecode', 'RunLengthDecode'];
/**
* Decode data using the specified filter type.
*
* @param string $filter Filter name
* @param string $data Data to decode
*
* @return string Decoded data string
*
* @throws Exception if a certain decode function is not implemented yet
*/
public function decodeFilter($filter, $data)
{
switch ($filter) {
case 'ASCIIHexDecode':
return $this->decodeFilterASCIIHexDecode($data);
case 'ASCII85Decode':
return $this->decodeFilterASCII85Decode($data);
case 'LZWDecode':
return $this->decodeFilterLZWDecode($data);
case 'FlateDecode':
return $this->decodeFilterFlateDecode($data);
case 'RunLengthDecode':
return $this->decodeFilterRunLengthDecode($data);
case 'CCITTFaxDecode':
throw new Exception('Decode CCITTFaxDecode not implemented yet.');
case 'JBIG2Decode':
throw new Exception('Decode JBIG2Decode not implemented yet.');
case 'DCTDecode':
throw new Exception('Decode DCTDecode not implemented yet.');
case 'JPXDecode':
throw new Exception('Decode JPXDecode not implemented yet.');
case 'Crypt':
throw new Exception('Decode Crypt not implemented yet.');
default:
return $data;
}
}
/**
* ASCIIHexDecode
*
* Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*/
protected function decodeFilterASCIIHexDecode($data)
{
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// check for EOD character: GREATER-THAN SIGN (3Eh)
$eod = strpos($data, '>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
$eod = true;
}
// get data length
$data_length = \strlen($data);
if (0 != ($data_length % 2)) {
// odd number of hexadecimal digits
if ($eod) {
// EOD shall behave as if a 0 (zero) followed the last digit
$data = substr($data, 0, -1).'0'.substr($data, -1);
} else {
throw new Exception('decodeFilterASCIIHexDecode: invalid code');
}
}
// check for invalid characters
if (preg_match('/[^a-fA-F\d]/', $data) > 0) {
throw new Exception('decodeFilterASCIIHexDecode: invalid code');
}
// get one byte of binary data for each pair of ASCII hexadecimal digits
$decoded = pack('H*', $data);
return $decoded;
}
/**
* ASCII85Decode
*
* Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data.
*
* @param string $data Data to decode
*
* @return string data string
*/
protected function decodeFilterASCII85Decode($data)
{
// initialize string to return
$decoded = '';
// all white-space characters shall be ignored
$data = preg_replace('/[\s]/', '', $data);
// remove start sequence 2-character sequence <~ (3Ch)(7Eh)
if (false !== strpos($data, '<~')) {
// remove EOD and extra data (if any)
$data = substr($data, 2);
}
// check for EOD: 2-character sequence ~> (7Eh)(3Eh)
$eod = strpos($data, '~>');
if (false !== $eod) {
// remove EOD and extra data (if any)
$data = substr($data, 0, $eod);
}
// data length
$data_length = \strlen($data);
// check for invalid characters
if (preg_match('/[^\x21-\x75,\x74]/', $data) > 0) {
throw new Exception('decodeFilterASCII85Decode: invalid code');
}
// z sequence
$zseq = \chr(0).\chr(0).\chr(0).\chr(0);
// position inside a group of 4 bytes (0-3)
$group_pos = 0;
$tuple = 0;
$pow85 = [(85 * 85 * 85 * 85), (85 * 85 * 85), (85 * 85), 85, 1];
// for each byte
for ($i = 0; $i < $data_length; ++$i) {
// get char value
$char = \ord($data[$i]);
if (122 == $char) { // 'z'
if (0 == $group_pos) {
$decoded .= $zseq;
} else {
throw new Exception('decodeFilterASCII85Decode: invalid code');
}
} else {
// the value represented by a group of 5 characters should never be greater than 2^32 - 1
$tuple += (($char - 33) * $pow85[$group_pos]);
if (4 == $group_pos) {
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8).\chr($tuple);
$tuple = 0;
$group_pos = 0;
} else {
++$group_pos;
}
}
}
if ($group_pos > 1) {
$tuple += $pow85[($group_pos - 1)];
}
// last tuple (if any)
switch ($group_pos) {
case 4:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16).\chr($tuple >> 8);
break;
case 3:
$decoded .= \chr($tuple >> 24).\chr($tuple >> 16);
break;
case 2:
$decoded .= \chr($tuple >> 24);
break;
case 1:
throw new Exception('decodeFilterASCII85Decode: invalid code');
}
return $decoded;
}
/**
* FlateDecode
*
* Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
*
* @return string data string
*/
protected function decodeFilterFlateDecode($data)
{
/*
* gzuncompress may throw a not catchable E_WARNING in case of an error (like $data is empty)
* the following set_error_handler changes an E_WARNING to an E_ERROR, which is catchable.
*/
set_error_handler(function ($errNo, $errStr) {
if (\E_WARNING === $errNo) {
throw new Exception($errStr);
} else {
// fallback to default php error handler
return false;
}
});
// initialize string to return
try {
$decoded = gzuncompress($data);
if (false === $decoded) {
throw new Exception('decodeFilterFlateDecode: invalid code');
}
} catch (Exception $e) {
throw $e;
} finally {
// Restore old handler just in case it was customized outside of PDFParser.
restore_error_handler();
}
return $decoded;
}
/**
* LZWDecode
*
* Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data.
*
* @param string $data Data to decode
*
* @return string Data string
*/
protected function decodeFilterLZWDecode($data)
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
// convert string to binary string
$bitstring = '';
for ($i = 0; $i < $data_length; ++$i) {
$bitstring .= sprintf('%08b', \ord($data[$i]));
}
// get the number of bits
$data_length = \strlen($bitstring);
// initialize code length in bits
$bitlen = 9;
// initialize dictionary index
$dix = 258;
// initialize the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
// previous val
$prev_index = 0;
// while we encounter EOD marker (257), read code_length bits
while (($data_length > 0) && (257 != ($index = bindec(substr($bitstring, 0, $bitlen))))) {
// remove read bits from string
$bitstring = substr($bitstring, $bitlen);
// update number of bits
$data_length -= $bitlen;
if (256 == $index) { // clear-table marker
// reset code length in bits
$bitlen = 9;
// reset dictionary index
$dix = 258;
$prev_index = 256;
// reset the dictionary (with the first 256 entries).
$dictionary = [];
for ($i = 0; $i < 256; ++$i) {
$dictionary[$i] = \chr($i);
}
} elseif (256 == $prev_index) {
// first entry
$decoded .= $dictionary[$index];
$prev_index = $index;
} else {
// check if index exist in the dictionary
if ($index < $dix) {
// index exist on dictionary
$decoded .= $dictionary[$index];
$dic_val = $dictionary[$prev_index].$dictionary[$index][0];
// store current index
$prev_index = $index;
} else {
// index do not exist on dictionary
$dic_val = $dictionary[$prev_index].$dictionary[$prev_index][0];
$decoded .= $dic_val;
}
// update dictionary
$dictionary[$dix] = $dic_val;
++$dix;
// change bit length by case
if (2047 == $dix) {
$bitlen = 12;
} elseif (1023 == $dix) {
$bitlen = 11;
} elseif (511 == $dix) {
$bitlen = 10;
}
}
}
return $decoded;
}
/**
* RunLengthDecode
*
* Decompresses data encoded using a byte-oriented run-length encoding algorithm.
*
* @param string $data Data to decode
*
* @return string
*/
protected function decodeFilterRunLengthDecode($data)
{
// initialize string to return
$decoded = '';
// data length
$data_length = \strlen($data);
$i = 0;
while ($i < $data_length) {
// get current byte value
$byte = \ord($data[$i]);
if (128 == $byte) {
// a length value of 128 denote EOD
break;
} elseif ($byte < 128) {
// if the length byte is in the range 0 to 127
// the following length + 1 (1 to 128) bytes shall be copied literally during decompression
$decoded .= substr($data, ($i + 1), ($byte + 1));
// move to next block
$i += ($byte + 2);
} else {
// if length is in the range 129 to 255,
// the following single byte shall be copied 257 - length (2 to 128) times during decompression
$decoded .= str_repeat($data[($i + 1)], (257 - $byte));
// move to next block
$i += 2;
}
}
return $decoded;
}
/**
* @return array list of available filters
*/
public function getAvailableFilters()
{
return $this->availableFilters;
}
}

View File

@@ -0,0 +1,883 @@
<?php
/**
* This file is based on code of tecnickcom/TCPDF PDF library.
*
* Original author Nicola Asuni (info@tecnick.com) and
* contributors (https://github.com/tecnickcom/TCPDF/graphs/contributors).
*
* @see https://github.com/tecnickcom/TCPDF
*
* Original code was licensed on the terms of the LGPL v3.
*
* ------------------------------------------------------------------------------
*
* @file This file is part of the PdfParser library.
*
* @author Konrad Abicht <k.abicht@gmail.com>
* @date 2020-01-06
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\RawData;
use Exception;
use Smalot\PdfParser\Config;
class RawDataParser
{
/**
* @var \Smalot\PdfParser\Config
*/
private $config;
/**
* Configuration array.
*/
protected $cfg = [
// if `true` ignore filter decoding errors
'ignore_filter_decoding_errors' => true,
// if `true` ignore missing filter decoding errors
'ignore_missing_filter_decoders' => true,
];
protected $filterHelper;
protected $objects;
/**
* @param array $cfg Configuration array, default is []
*/
public function __construct($cfg = [], Config $config = null)
{
// merge given array with default values
$this->cfg = array_merge($this->cfg, $cfg);
$this->filterHelper = new FilterHelper();
$this->config = $config ?: new Config();
}
/**
* Decode the specified stream.
*
* @param string $pdfData PDF data
* @param array $xref
* @param array $sdic Stream's dictionary array
* @param string $stream Stream to decode
*
* @return array containing decoded stream data and remaining filters
*/
protected function decodeStream($pdfData, $xref, $sdic, $stream)
{
// get stream length and filters
$slength = \strlen($stream);
if ($slength <= 0) {
return ['', []];
}
$filters = [];
foreach ($sdic as $k => $v) {
if ('/' == $v[0]) {
if (('Length' == $v[1]) && (isset($sdic[($k + 1)])) && ('numeric' == $sdic[($k + 1)][0])) {
// get declared stream length
$declength = (int) ($sdic[($k + 1)][1]);
if ($declength < $slength) {
$stream = substr($stream, 0, $declength);
$slength = $declength;
}
} elseif (('Filter' == $v[1]) && (isset($sdic[($k + 1)]))) {
// resolve indirect object
$objval = $this->getObjectVal($pdfData, $xref, $sdic[($k + 1)]);
if ('/' == $objval[0]) {
// single filter
$filters[] = $objval[1];
} elseif ('[' == $objval[0]) {
// array of filters
foreach ($objval[1] as $flt) {
if ('/' == $flt[0]) {
$filters[] = $flt[1];
}
}
}
}
}
}
// decode the stream
$remaining_filters = [];
foreach ($filters as $filter) {
if (\in_array($filter, $this->filterHelper->getAvailableFilters())) {
try {
$stream = $this->filterHelper->decodeFilter($filter, $stream);
} catch (Exception $e) {
$emsg = $e->getMessage();
if ((('~' == $emsg[0]) && !$this->cfg['ignore_missing_filter_decoders'])
|| (('~' != $emsg[0]) && !$this->cfg['ignore_filter_decoding_errors'])
) {
throw new Exception($e->getMessage());
}
}
} else {
// add missing filter to array
$remaining_filters[] = $filter;
}
}
return [$stream, $remaining_filters];
}
/**
* Decode the Cross-Reference section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts (position of the 'xref' keyword)
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*/
protected function decodeXref($pdfData, $startxref, $xref = [])
{
$startxref += 4; // 4 is the length of the word 'xref'
// skip initial white space chars
$offset = $startxref + strspn($pdfData, $this->config->getPdfWhitespaces(), $startxref);
// initialize object number
$obj_num = 0;
// search for cross-reference entries or subsection
while (preg_match('/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
if ($matches[0][1] != $offset) {
// we are on another section
break;
}
$offset += \strlen($matches[0][0]);
if ('n' == $matches[3][0]) {
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.(int) ($matches[2][0]);
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = (int) ($matches[1][0]);
}
++$obj_num;
} elseif ('f' == $matches[3][0]) {
++$obj_num;
} else {
// object number (index)
$obj_num = (int) ($matches[1][0]);
}
}
// get trailer data
if (preg_match('/trailer[\s]*<<(.*)>>/isU', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset) > 0) {
$trailer_data = $matches[1][0];
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
// parse trailer_data
if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
$xref['trailer']['size'] = (int) ($matches[1]);
}
if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['root'] = (int) ($matches[1]).'_'.(int) ($matches[2]);
}
if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['encrypt'] = (int) ($matches[1]).'_'.(int) ($matches[2]);
}
if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
$xref['trailer']['info'] = (int) ($matches[1]).'_'.(int) ($matches[2]);
}
if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $matches[1];
$xref['trailer']['id'][1] = $matches[2];
}
}
if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
// get previous xref
$xref = $this->getXrefData($pdfData, (int) ($matches[1]), $xref);
}
} else {
throw new Exception('Unable to find trailer');
}
return $xref;
}
/**
* Decode the Cross-Reference Stream section
*
* @param string $pdfData PDF data
* @param int $startxref Offset at which the xref section starts
* @param array $xref Previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws Exception if unknown PNG predictor detected
*/
protected function decodeXrefStream($pdfData, $startxref, $xref = [])
{
// try to read Cross-Reference Stream
$xrefobj = $this->getRawObject($pdfData, $startxref);
$xrefcrs = $this->getIndirectObject($pdfData, $xref, $xrefobj[1], $startxref, true);
if (!isset($xref['trailer']) || empty($xref['trailer'])) {
// get only the last updated version
$xref['trailer'] = [];
$filltrailer = true;
} else {
$filltrailer = false;
}
if (!isset($xref['xref'])) {
$xref['xref'] = [];
}
$valid_crs = false;
$columns = 0;
$predictor = null;
$sarr = $xrefcrs[0][1];
if (!\is_array($sarr)) {
$sarr = [];
}
$wb = [];
foreach ($sarr as $k => $v) {
if (
('/' == $v[0])
&& ('Type' == $v[1])
&& (
isset($sarr[($k + 1)])
&& '/' == $sarr[($k + 1)][0]
&& 'XRef' == $sarr[($k + 1)][1]
)
) {
$valid_crs = true;
} elseif (('/' == $v[0]) && ('Index' == $v[1]) && (isset($sarr[($k + 1)]))) {
// first object number in the subsection
$index_first = (int) ($sarr[($k + 1)][1][0][1]);
} elseif (('/' == $v[0]) && ('Prev' == $v[1]) && (isset($sarr[($k + 1)]) && ('numeric' == $sarr[($k + 1)][0]))) {
// get previous xref offset
$prevxref = (int) ($sarr[($k + 1)][1]);
} elseif (('/' == $v[0]) && ('W' == $v[1]) && (isset($sarr[($k + 1)]))) {
// number of bytes (in the decoded stream) of the corresponding field
$wb[0] = (int) ($sarr[($k + 1)][1][0][1]);
$wb[1] = (int) ($sarr[($k + 1)][1][1][1]);
$wb[2] = (int) ($sarr[($k + 1)][1][2][1]);
} elseif (('/' == $v[0]) && ('DecodeParms' == $v[1]) && (isset($sarr[($k + 1)][1]))) {
$decpar = $sarr[($k + 1)][1];
foreach ($decpar as $kdc => $vdc) {
if (
'/' == $vdc[0]
&& 'Columns' == $vdc[1]
&& (
isset($decpar[($kdc + 1)])
&& 'numeric' == $decpar[($kdc + 1)][0]
)
) {
$columns = (int) ($decpar[($kdc + 1)][1]);
} elseif (
'/' == $vdc[0]
&& 'Predictor' == $vdc[1]
&& (
isset($decpar[($kdc + 1)])
&& 'numeric' == $decpar[($kdc + 1)][0]
)
) {
$predictor = (int) ($decpar[($kdc + 1)][1]);
}
}
} elseif ($filltrailer) {
if (('/' == $v[0]) && ('Size' == $v[1]) && (isset($sarr[($k + 1)]) && ('numeric' == $sarr[($k + 1)][0]))) {
$xref['trailer']['size'] = $sarr[($k + 1)][1];
} elseif (('/' == $v[0]) && ('Root' == $v[1]) && (isset($sarr[($k + 1)]) && ('objref' == $sarr[($k + 1)][0]))) {
$xref['trailer']['root'] = $sarr[($k + 1)][1];
} elseif (('/' == $v[0]) && ('Info' == $v[1]) && (isset($sarr[($k + 1)]) && ('objref' == $sarr[($k + 1)][0]))) {
$xref['trailer']['info'] = $sarr[($k + 1)][1];
} elseif (('/' == $v[0]) && ('Encrypt' == $v[1]) && (isset($sarr[($k + 1)]) && ('objref' == $sarr[($k + 1)][0]))) {
$xref['trailer']['encrypt'] = $sarr[($k + 1)][1];
} elseif (('/' == $v[0]) && ('ID' == $v[1]) && (isset($sarr[($k + 1)]))) {
$xref['trailer']['id'] = [];
$xref['trailer']['id'][0] = $sarr[($k + 1)][1][0][1];
$xref['trailer']['id'][1] = $sarr[($k + 1)][1][1][1];
}
}
}
// decode data
if ($valid_crs && isset($xrefcrs[1][3][0])) {
if (null !== $predictor) {
// number of bytes in a row
$rowlen = ($columns + 1);
// convert the stream into an array of integers
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// split the rows
$sdata = array_chunk($sdata, $rowlen);
// initialize decoded array
$ddata = [];
// initialize first row with zeros
$prev_row = array_fill(0, $rowlen, 0);
// for each row apply PNG unpredictor
foreach ($sdata as $k => $row) {
// initialize new row
$ddata[$k] = [];
// get PNG predictor value
$predictor = (10 + $row[0]);
// for each byte on the row
for ($i = 1; $i <= $columns; ++$i) {
// new index
$j = ($i - 1);
$row_up = $prev_row[$j];
if (1 == $i) {
$row_left = 0;
$row_upleft = 0;
} else {
$row_left = $row[($i - 1)];
$row_upleft = $prev_row[($j - 1)];
}
switch ($predictor) {
case 10: // PNG prediction (on encoding, PNG None on all rows)
$ddata[$k][$j] = $row[$i];
break;
case 11: // PNG prediction (on encoding, PNG Sub on all rows)
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
break;
case 12: // PNG prediction (on encoding, PNG Up on all rows)
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
break;
case 13: // PNG prediction (on encoding, PNG Average on all rows)
$ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
break;
case 14: // PNG prediction (on encoding, PNG Paeth on all rows)
// initial estimate
$p = ($row_left + $row_up - $row_upleft);
// distances
$pa = abs($p - $row_left);
$pb = abs($p - $row_up);
$pc = abs($p - $row_upleft);
$pmin = min($pa, $pb, $pc);
// return minimum distance
switch ($pmin) {
case $pa:
$ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
break;
case $pb:
$ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
break;
case $pc:
$ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
break;
}
break;
default: // PNG prediction (on encoding, PNG optimum)
throw new Exception('Unknown PNG predictor: '.$predictor);
}
}
$prev_row = $ddata[$k];
} // end for each row
// complete decoding
} else {
// number of bytes in a row
$rowlen = array_sum($wb);
// convert the stream into an array of integers
$sdata = unpack('C*', $xrefcrs[1][3][0]);
// split the rows
$ddata = array_chunk($sdata, $rowlen);
}
$sdata = [];
// for every row
foreach ($ddata as $k => $row) {
// initialize new row
$sdata[$k] = [0, 0, 0];
if (0 == $wb[0]) {
// default type field
$sdata[$k][0] = 1;
}
$i = 0; // count bytes in the row
// for every column
for ($c = 0; $c < 3; ++$c) {
// for every byte on the column
for ($b = 0; $b < $wb[$c]; ++$b) {
if (isset($row[$i])) {
$sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
}
++$i;
}
}
}
$ddata = [];
// fill xref
if (isset($index_first)) {
$obj_num = $index_first;
} else {
$obj_num = 0;
}
foreach ($sdata as $k => $row) {
switch ($row[0]) {
case 0: // (f) linked list of free objects
break;
case 1: // (n) objects that are in use but are not compressed
// create unique object index: [object number]_[generation number]
$index = $obj_num.'_'.$row[2];
// check if object already exist
if (!isset($xref['xref'][$index])) {
// store object offset position
$xref['xref'][$index] = $row[1];
}
break;
case 2: // compressed objects
// $row[1] = object number of the object stream in which this object is stored
// $row[2] = index of this object within the object stream
$index = $row[1].'_0_'.$row[2];
$xref['xref'][$index] = -1;
break;
default: // null objects
break;
}
++$obj_num;
}
} // end decoding data
if (isset($prevxref)) {
// get previous xref
$xref = $this->getXrefData($pdfData, $prevxref, $xref);
}
return $xref;
}
protected function getObjectHeaderPattern($objRefArr): string
{
// consider all whitespace character (PDF specifications)
return '/'.$objRefArr[0].$this->config->getPdfWhitespacesRegex().$objRefArr[1].$this->config->getPdfWhitespacesRegex().'obj'.'/';
}
protected function getObjectHeaderLen($objRefArr): int
{
// "4 0 obj"
// 2 whitespaces + strlen("obj") = 5
return 5 + \strlen($objRefArr[0]) + \strlen($objRefArr[1]);
}
/**
* Get content of indirect object.
*
* @param string $pdfData PDF data
* @param array $xref
* @param string $objRef Object number and generation number separated by underscore character
* @param int $offset Object offset
* @param bool $decoding If true decode streams
*
* @return array containing object data
*
* @throws Exception if invalid object reference found
*/
protected function getIndirectObject($pdfData, $xref, $objRef, $offset = 0, $decoding = true)
{
/*
* build indirect object header
*/
// $objHeader = "[object number] [generation number] obj"
$objRefArr = explode('_', $objRef);
if (2 !== \count($objRefArr)) {
throw new Exception('Invalid object reference for $obj.');
}
$objHeaderLen = $this->getObjectHeaderLen($objRefArr);
/*
* check if we are in position
*/
// ignore whitespace characters at offset
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// ignore leading zeros for object number
$offset += strspn($pdfData, '0', $offset);
if (0 == preg_match($this->getObjectHeaderPattern($objRefArr), substr($pdfData, $offset, $objHeaderLen))) {
// an indirect reference to an undefined object shall be considered a reference to the null object
return ['null', 'null', $offset];
}
/*
* get content
*/
// starting position of object content
$offset += $objHeaderLen;
$objContentArr = [];
$i = 0; // object main index
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
// decode stream using stream's dictionary information
if ($decoding && ('stream' === $element[0]) && (isset($objContentArr[($i - 1)][0])) && ('<<' === $objContentArr[($i - 1)][0])) {
$element[3] = $this->decodeStream($pdfData, $xref, $objContentArr[($i - 1)][1], $element[1]);
}
$objContentArr[$i] = $element;
++$i;
} while (('endobj' !== $element[0]) && ($offset !== $oldOffset));
// remove closing delimiter
array_pop($objContentArr);
/*
* return raw object content
*/
return $objContentArr;
}
/**
* Get the content of object, resolving indirect object reference if necessary.
*
* @param string $pdfData PDF data
* @param array $obj Object value
*
* @return array containing object data
*
* @throws Exception
*/
protected function getObjectVal($pdfData, $xref, $obj)
{
if ('objref' == $obj[0]) {
// reference to indirect object
if (isset($this->objects[$obj[1]])) {
// this object has been already parsed
return $this->objects[$obj[1]];
} elseif (isset($xref[$obj[1]])) {
// parse new object
$this->objects[$obj[1]] = $this->getIndirectObject($pdfData, $xref, $obj[1], $xref[$obj[1]], false);
return $this->objects[$obj[1]];
}
}
return $obj;
}
/**
* Get object type, raw value and offset to next object
*
* @param int $offset Object offset
*
* @return array containing object type, raw value and offset to next object
*/
protected function getRawObject($pdfData, $offset = 0)
{
$objtype = ''; // object type to be returned
$objval = ''; // object value to be returned
// skip initial white space chars
$offset += strspn($pdfData, $this->config->getPdfWhitespaces(), $offset);
// get first char
$char = $pdfData[$offset];
// get object type
switch ($char) {
case '%': // \x25 PERCENT SIGN
// skip comment and search for next token
$next = strcspn($pdfData, "\r\n", $offset);
if ($next > 0) {
$offset += $next;
return $this->getRawObject($pdfData, $offset);
}
break;
case '/': // \x2F SOLIDUS
// name object
$objtype = $char;
++$offset;
$pregResult = preg_match(
'/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/',
substr($pdfData, $offset, 256),
$matches
);
if (1 == $pregResult) {
$objval = $matches[1]; // unescaped value
$offset += \strlen($objval);
}
break;
case '(': // \x28 LEFT PARENTHESIS
case ')': // \x29 RIGHT PARENTHESIS
// literal string object
$objtype = $char;
++$offset;
$strpos = $offset;
if ('(' == $char) {
$open_bracket = 1;
while ($open_bracket > 0) {
if (!isset($pdfData[$strpos])) {
break;
}
$ch = $pdfData[$strpos];
switch ($ch) {
case '\\': // REVERSE SOLIDUS (5Ch) (Backslash)
// skip next character
++$strpos;
break;
case '(': // LEFT PARENHESIS (28h)
++$open_bracket;
break;
case ')': // RIGHT PARENTHESIS (29h)
--$open_bracket;
break;
}
++$strpos;
}
$objval = substr($pdfData, $offset, ($strpos - $offset - 1));
$offset = $strpos;
}
break;
case '[': // \x5B LEFT SQUARE BRACKET
case ']': // \x5D RIGHT SQUARE BRACKET
// array object
$objtype = $char;
++$offset;
if ('[' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while ((']' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
break;
case '<': // \x3C LESS-THAN SIGN
case '>': // \x3E GREATER-THAN SIGN
if (isset($pdfData[($offset + 1)]) && ($pdfData[($offset + 1)] == $char)) {
// dictionary object
$objtype = $char.$char;
$offset += 2;
if ('<' == $char) {
// get array content
$objval = [];
do {
$oldOffset = $offset;
// get element
$element = $this->getRawObject($pdfData, $offset);
$offset = $element[2];
$objval[] = $element;
} while (('>>' != $element[0]) && ($offset != $oldOffset));
// remove closing delimiter
array_pop($objval);
}
} else {
// hexadecimal string object
$objtype = $char;
++$offset;
$pregResult = preg_match(
'/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU',
substr($pdfData, $offset),
$matches
);
if (('<' == $char) && 1 == $pregResult) {
// remove white space characters
$objval = strtr($matches[1], $this->config->getPdfWhitespaces(), '');
$offset += \strlen($matches[0]);
} elseif (false !== ($endpos = strpos($pdfData, '>', $offset))) {
$offset = $endpos + 1;
}
}
break;
default:
if ('endobj' == substr($pdfData, $offset, 6)) {
// indirect object
$objtype = 'endobj';
$offset += 6;
} elseif ('null' == substr($pdfData, $offset, 4)) {
// null object
$objtype = 'null';
$offset += 4;
$objval = 'null';
} elseif ('true' == substr($pdfData, $offset, 4)) {
// boolean true object
$objtype = 'boolean';
$offset += 4;
$objval = 'true';
} elseif ('false' == substr($pdfData, $offset, 5)) {
// boolean false object
$objtype = 'boolean';
$offset += 5;
$objval = 'false';
} elseif ('stream' == substr($pdfData, $offset, 6)) {
// start stream object
$objtype = 'stream';
$offset += 6;
if (1 == preg_match('/^([\r]?[\n])/isU', substr($pdfData, $offset), $matches)) {
$offset += \strlen($matches[0]);
$pregResult = preg_match(
'/(endstream)[\x09\x0a\x0c\x0d\x20]/isU',
substr($pdfData, $offset),
$matches,
\PREG_OFFSET_CAPTURE
);
if (1 == $pregResult) {
$objval = substr($pdfData, $offset, $matches[0][1]);
$offset += $matches[1][1];
}
}
} elseif ('endstream' == substr($pdfData, $offset, 9)) {
// end stream object
$objtype = 'endstream';
$offset += 9;
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU', substr($pdfData, $offset, 33), $matches)) {
// indirect object reference
$objtype = 'objref';
$offset += \strlen($matches[0]);
$objval = (int) ($matches[1]).'_'.(int) ($matches[2]);
} elseif (1 == preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU', substr($pdfData, $offset, 33), $matches)) {
// object start
$objtype = 'obj';
$objval = (int) ($matches[1]).'_'.(int) ($matches[2]);
$offset += \strlen($matches[0]);
} elseif (($numlen = strspn($pdfData, '+-.0123456789', $offset)) > 0) {
// numeric object
$objtype = 'numeric';
$objval = substr($pdfData, $offset, $numlen);
$offset += $numlen;
}
break;
}
return [$objtype, $objval, $offset];
}
/**
* Get Cross-Reference (xref) table and trailer data from PDF document data.
*
* @param string $pdfData
* @param int $offset xref offset (if know)
* @param array $xref previous xref array (if any)
*
* @return array containing xref and trailer data
*
* @throws Exception if it was unable to find startxref
* @throws Exception if it was unable to find xref
*/
protected function getXrefData($pdfData, $offset = 0, $xref = [])
{
$startxrefPreg = preg_match(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData,
$matches,
\PREG_OFFSET_CAPTURE,
$offset
);
if (0 == $offset) {
// find last startxref
$pregResult = preg_match_all(
'/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
$pdfData, $matches,
\PREG_SET_ORDER,
$offset
);
if (0 == $pregResult) {
throw new Exception('Unable to find startxref');
}
$matches = array_pop($matches);
$startxref = $matches[1];
} elseif (strpos($pdfData, 'xref', $offset) == $offset) {
// Already pointing at the xref table
$startxref = $offset;
} elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $pdfData, $matches, \PREG_OFFSET_CAPTURE, $offset)) {
// Cross-Reference Stream object
$startxref = $offset;
} elseif ($startxrefPreg) {
// startxref found
$startxref = $matches[1][0];
} else {
throw new Exception('Unable to find startxref');
}
if ($startxref > \strlen($pdfData)) {
throw new Exception('Unable to find xref (PDF corrupted?)');
}
// check xref position
if (strpos($pdfData, 'xref', $startxref) == $startxref) {
// Cross-Reference
$xref = $this->decodeXref($pdfData, $startxref, $xref);
} else {
// Cross-Reference Stream
$xref = $this->decodeXrefStream($pdfData, $startxref, $xref);
}
if (empty($xref)) {
throw new Exception('Unable to find xref');
}
return $xref;
}
/**
* Parses PDF data and returns extracted data as array.
*
* @param string $data PDF data to parse
*
* @return array array of parsed PDF document objects
*
* @throws Exception if empty PDF data given
* @throws Exception if PDF data missing %PDF header
*/
public function parseData($data)
{
if (empty($data)) {
throw new Exception('Empty PDF data given.');
}
// find the pdf header starting position
if (false === ($trimpos = strpos($data, '%PDF-'))) {
throw new Exception('Invalid PDF data: missing %PDF header.');
}
// get PDF content string
$pdfData = substr($data, $trimpos);
// get xref and trailer data
$xref = $this->getXrefData($pdfData);
// parse all document objects
$objects = [];
foreach ($xref['xref'] as $obj => $offset) {
if (!isset($objects[$obj]) && ($offset > 0)) {
// decode objects with positive offset
$objects[$obj] = $this->getIndirectObject($pdfData, $xref, $obj, $offset, true);
}
}
return [$xref, $objects];
}
}

View File

@@ -0,0 +1,54 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Header;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
/**
* Class Form
*/
class Form extends Page
{
/**
* @param Page $page
*
* @return string
*/
public function getText(Page $page = null)
{
$header = new Header([], $this->document);
$contents = new PDFObject($this->document, $header, $this->content, $this->config);
return $contents->getText($this);
}
}

View File

@@ -0,0 +1,50 @@
<?php
/**
* @file
* This file is part of the PdfParser library.
*
* @author Sébastien MALOT <sebastien@malot.fr>
* @date 2017-01-03
*
* @license LGPLv3
* @url <https://github.com/smalot/pdfparser>
*
* PdfParser is a pdf library written in PHP, extraction oriented.
* Copyright (C) 2017 - Sébastien MALOT <sebastien@malot.fr>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program.
* If not, see <http://www.pdfparser.org/sites/default/LICENSE.txt>.
*/
namespace Smalot\PdfParser\XObject;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\PDFObject;
/**
* Class Image
*/
class Image extends PDFObject
{
/**
* @param Page $page
*
* @return string
*/
public function getText(Page $page = null)
{
return '';
}
}

View File

@@ -0,0 +1,19 @@
Copyright (c) 2015-2019 Fabien Potencier
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -0,0 +1,870 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Symfony\Polyfill\Mbstring;
/**
* Partial mbstring implementation in PHP, iconv based, UTF-8 centric.
*
* Implemented:
* - mb_chr - Returns a specific character from its Unicode code point
* - mb_convert_encoding - Convert character encoding
* - mb_convert_variables - Convert character code in variable(s)
* - mb_decode_mimeheader - Decode string in MIME header field
* - mb_encode_mimeheader - Encode string for MIME header XXX NATIVE IMPLEMENTATION IS REALLY BUGGED
* - mb_decode_numericentity - Decode HTML numeric string reference to character
* - mb_encode_numericentity - Encode character to HTML numeric string reference
* - mb_convert_case - Perform case folding on a string
* - mb_detect_encoding - Detect character encoding
* - mb_get_info - Get internal settings of mbstring
* - mb_http_input - Detect HTTP input character encoding
* - mb_http_output - Set/Get HTTP output character encoding
* - mb_internal_encoding - Set/Get internal character encoding
* - mb_list_encodings - Returns an array of all supported encodings
* - mb_ord - Returns the Unicode code point of a character
* - mb_output_handler - Callback function converts character encoding in output buffer
* - mb_scrub - Replaces ill-formed byte sequences with substitute characters
* - mb_strlen - Get string length
* - mb_strpos - Find position of first occurrence of string in a string
* - mb_strrpos - Find position of last occurrence of a string in a string
* - mb_str_split - Convert a string to an array
* - mb_strtolower - Make a string lowercase
* - mb_strtoupper - Make a string uppercase
* - mb_substitute_character - Set/Get substitution character
* - mb_substr - Get part of string
* - mb_stripos - Finds position of first occurrence of a string within another, case insensitive
* - mb_stristr - Finds first occurrence of a string within another, case insensitive
* - mb_strrchr - Finds the last occurrence of a character in a string within another
* - mb_strrichr - Finds the last occurrence of a character in a string within another, case insensitive
* - mb_strripos - Finds position of last occurrence of a string within another, case insensitive
* - mb_strstr - Finds first occurrence of a string within another
* - mb_strwidth - Return width of string
* - mb_substr_count - Count the number of substring occurrences
*
* Not implemented:
* - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
* - mb_ereg_* - Regular expression with multibyte support
* - mb_parse_str - Parse GET/POST/COOKIE data and set global variable
* - mb_preferred_mime_name - Get MIME charset string
* - mb_regex_encoding - Returns current encoding for multibyte regex as string
* - mb_regex_set_options - Set/Get the default options for mbregex functions
* - mb_send_mail - Send encoded mail
* - mb_split - Split multibyte string using regular expression
* - mb_strcut - Get part of string
* - mb_strimwidth - Get truncated string with specified width
*
* @author Nicolas Grekas <p@tchwork.com>
*
* @internal
*/
final class Mbstring
{
public const MB_CASE_FOLD = \PHP_INT_MAX;
private const CASE_FOLD = [
['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE"],
['μ', 's', 'ι', 'σ', 'β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1", 'ι'],
];
private static $encodingList = ['ASCII', 'UTF-8'];
private static $language = 'neutral';
private static $internalEncoding = 'UTF-8';
public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null)
{
if (\is_array($fromEncoding) || false !== strpos($fromEncoding, ',')) {
$fromEncoding = self::mb_detect_encoding($s, $fromEncoding);
} else {
$fromEncoding = self::getEncoding($fromEncoding);
}
$toEncoding = self::getEncoding($toEncoding);
if ('BASE64' === $fromEncoding) {
$s = base64_decode($s);
$fromEncoding = $toEncoding;
}
if ('BASE64' === $toEncoding) {
return base64_encode($s);
}
if ('HTML-ENTITIES' === $toEncoding || 'HTML' === $toEncoding) {
if ('HTML-ENTITIES' === $fromEncoding || 'HTML' === $fromEncoding) {
$fromEncoding = 'Windows-1252';
}
if ('UTF-8' !== $fromEncoding) {
$s = \iconv($fromEncoding, 'UTF-8//IGNORE', $s);
}
return preg_replace_callback('/[\x80-\xFF]+/', [__CLASS__, 'html_encoding_callback'], $s);
}
if ('HTML-ENTITIES' === $fromEncoding) {
$s = html_entity_decode($s, \ENT_COMPAT, 'UTF-8');
$fromEncoding = 'UTF-8';
}
return \iconv($fromEncoding, $toEncoding.'//IGNORE', $s);
}
public static function mb_convert_variables($toEncoding, $fromEncoding, &...$vars)
{
$ok = true;
array_walk_recursive($vars, function (&$v) use (&$ok, $toEncoding, $fromEncoding) {
if (false === $v = self::mb_convert_encoding($v, $toEncoding, $fromEncoding)) {
$ok = false;
}
});
return $ok ? $fromEncoding : false;
}
public static function mb_decode_mimeheader($s)
{
return \iconv_mime_decode($s, 2, self::$internalEncoding);
}
public static function mb_encode_mimeheader($s, $charset = null, $transferEncoding = null, $linefeed = null, $indent = null)
{
trigger_error('mb_encode_mimeheader() is bugged. Please use iconv_mime_encode() instead', \E_USER_WARNING);
}
public static function mb_decode_numericentity($s, $convmap, $encoding = null)
{
if (null !== $s && !is_scalar($s) && !(\is_object($s) && method_exists($s, '__toString'))) {
trigger_error('mb_decode_numericentity() expects parameter 1 to be string, '.\gettype($s).' given', \E_USER_WARNING);
return null;
}
if (!\is_array($convmap) || (80000 > \PHP_VERSION_ID && !$convmap)) {
return false;
}
if (null !== $encoding && !is_scalar($encoding)) {
trigger_error('mb_decode_numericentity() expects parameter 3 to be string, '.\gettype($s).' given', \E_USER_WARNING);
return ''; // Instead of null (cf. mb_encode_numericentity).
}
$s = (string) $s;
if ('' === $s) {
return '';
}
$encoding = self::getEncoding($encoding);
if ('UTF-8' === $encoding) {
$encoding = null;
if (!preg_match('//u', $s)) {
$s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s);
}
} else {
$s = \iconv($encoding, 'UTF-8//IGNORE', $s);
}
$cnt = floor(\count($convmap) / 4) * 4;
for ($i = 0; $i < $cnt; $i += 4) {
// collector_decode_htmlnumericentity ignores $convmap[$i + 3]
$convmap[$i] += $convmap[$i + 2];
$convmap[$i + 1] += $convmap[$i + 2];
}
$s = preg_replace_callback('/&#(?:0*([0-9]+)|x0*([0-9a-fA-F]+))(?!&);?/', function (array $m) use ($cnt, $convmap) {
$c = isset($m[2]) ? (int) hexdec($m[2]) : $m[1];
for ($i = 0; $i < $cnt; $i += 4) {
if ($c >= $convmap[$i] && $c <= $convmap[$i + 1]) {
return self::mb_chr($c - $convmap[$i + 2]);
}
}
return $m[0];
}, $s);
if (null === $encoding) {
return $s;
}
return \iconv('UTF-8', $encoding.'//IGNORE', $s);
}
public static function mb_encode_numericentity($s, $convmap, $encoding = null, $is_hex = false)
{
if (null !== $s && !is_scalar($s) && !(\is_object($s) && method_exists($s, '__toString'))) {
trigger_error('mb_encode_numericentity() expects parameter 1 to be string, '.\gettype($s).' given', \E_USER_WARNING);
return null;
}
if (!\is_array($convmap) || (80000 > \PHP_VERSION_ID && !$convmap)) {
return false;
}
if (null !== $encoding && !is_scalar($encoding)) {
trigger_error('mb_encode_numericentity() expects parameter 3 to be string, '.\gettype($s).' given', \E_USER_WARNING);
return null; // Instead of '' (cf. mb_decode_numericentity).
}
if (null !== $is_hex && !is_scalar($is_hex)) {
trigger_error('mb_encode_numericentity() expects parameter 4 to be boolean, '.\gettype($s).' given', \E_USER_WARNING);
return null;
}
$s = (string) $s;
if ('' === $s) {
return '';
}
$encoding = self::getEncoding($encoding);
if ('UTF-8' === $encoding) {
$encoding = null;
if (!preg_match('//u', $s)) {
$s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s);
}
} else {
$s = \iconv($encoding, 'UTF-8//IGNORE', $s);
}
static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4];
$cnt = floor(\count($convmap) / 4) * 4;
$i = 0;
$len = \strlen($s);
$result = '';
while ($i < $len) {
$ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"];
$uchr = substr($s, $i, $ulen);
$i += $ulen;
$c = self::mb_ord($uchr);
for ($j = 0; $j < $cnt; $j += 4) {
if ($c >= $convmap[$j] && $c <= $convmap[$j + 1]) {
$cOffset = ($c + $convmap[$j + 2]) & $convmap[$j + 3];
$result .= $is_hex ? sprintf('&#x%X;', $cOffset) : '&#'.$cOffset.';';
continue 2;
}
}
$result .= $uchr;
}
if (null === $encoding) {
return $result;
}
return \iconv('UTF-8', $encoding.'//IGNORE', $result);
}
public static function mb_convert_case($s, $mode, $encoding = null)
{
$s = (string) $s;
if ('' === $s) {
return '';
}
$encoding = self::getEncoding($encoding);
if ('UTF-8' === $encoding) {
$encoding = null;
if (!preg_match('//u', $s)) {
$s = @\iconv('UTF-8', 'UTF-8//IGNORE', $s);
}
} else {
$s = \iconv($encoding, 'UTF-8//IGNORE', $s);
}
if (\MB_CASE_TITLE == $mode) {
static $titleRegexp = null;
if (null === $titleRegexp) {
$titleRegexp = self::getData('titleCaseRegexp');
}
$s = preg_replace_callback($titleRegexp, [__CLASS__, 'title_case'], $s);
} else {
if (\MB_CASE_UPPER == $mode) {
static $upper = null;
if (null === $upper) {
$upper = self::getData('upperCase');
}
$map = $upper;
} else {
if (self::MB_CASE_FOLD === $mode) {
$s = str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $s);
}
static $lower = null;
if (null === $lower) {
$lower = self::getData('lowerCase');
}
$map = $lower;
}
static $ulenMask = ["\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4];
$i = 0;
$len = \strlen($s);
while ($i < $len) {
$ulen = $s[$i] < "\x80" ? 1 : $ulenMask[$s[$i] & "\xF0"];
$uchr = substr($s, $i, $ulen);
$i += $ulen;
if (isset($map[$uchr])) {
$uchr = $map[$uchr];
$nlen = \strlen($uchr);
if ($nlen == $ulen) {
$nlen = $i;
do {
$s[--$nlen] = $uchr[--$ulen];
} while ($ulen);
} else {
$s = substr_replace($s, $uchr, $i - $ulen, $ulen);
$len += $nlen - $ulen;
$i += $nlen - $ulen;
}
}
}
}
if (null === $encoding) {
return $s;
}
return \iconv('UTF-8', $encoding.'//IGNORE', $s);
}
public static function mb_internal_encoding($encoding = null)
{
if (null === $encoding) {
return self::$internalEncoding;
}
$normalizedEncoding = self::getEncoding($encoding);
if ('UTF-8' === $normalizedEncoding || false !== @\iconv($normalizedEncoding, $normalizedEncoding, ' ')) {
self::$internalEncoding = $normalizedEncoding;
return true;
}
if (80000 > \PHP_VERSION_ID) {
return false;
}
throw new \ValueError(sprintf('Argument #1 ($encoding) must be a valid encoding, "%s" given', $encoding));
}
public static function mb_language($lang = null)
{
if (null === $lang) {
return self::$language;
}
switch ($normalizedLang = strtolower($lang)) {
case 'uni':
case 'neutral':
self::$language = $normalizedLang;
return true;
}
if (80000 > \PHP_VERSION_ID) {
return false;
}
throw new \ValueError(sprintf('Argument #1 ($language) must be a valid language, "%s" given', $lang));
}
public static function mb_list_encodings()
{
return ['UTF-8'];
}
public static function mb_encoding_aliases($encoding)
{
switch (strtoupper($encoding)) {
case 'UTF8':
case 'UTF-8':
return ['utf8'];
}
return false;
}
public static function mb_check_encoding($var = null, $encoding = null)
{
if (null === $encoding) {
if (null === $var) {
return false;
}
$encoding = self::$internalEncoding;
}
return self::mb_detect_encoding($var, [$encoding]) || false !== @\iconv($encoding, $encoding, $var);
}
public static function mb_detect_encoding($str, $encodingList = null, $strict = false)
{
if (null === $encodingList) {
$encodingList = self::$encodingList;
} else {
if (!\is_array($encodingList)) {
$encodingList = array_map('trim', explode(',', $encodingList));
}
$encodingList = array_map('strtoupper', $encodingList);
}
foreach ($encodingList as $enc) {
switch ($enc) {
case 'ASCII':
if (!preg_match('/[\x80-\xFF]/', $str)) {
return $enc;
}
break;
case 'UTF8':
case 'UTF-8':
if (preg_match('//u', $str)) {
return 'UTF-8';
}
break;
default:
if (0 === strncmp($enc, 'ISO-8859-', 9)) {
return $enc;
}
}
}
return false;
}
public static function mb_detect_order($encodingList = null)
{
if (null === $encodingList) {
return self::$encodingList;
}
if (!\is_array($encodingList)) {
$encodingList = array_map('trim', explode(',', $encodingList));
}
$encodingList = array_map('strtoupper', $encodingList);
foreach ($encodingList as $enc) {
switch ($enc) {
default:
if (strncmp($enc, 'ISO-8859-', 9)) {
return false;
}
// no break
case 'ASCII':
case 'UTF8':
case 'UTF-8':
}
}
self::$encodingList = $encodingList;
return true;
}
public static function mb_strlen($s, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('CP850' === $encoding || 'ASCII' === $encoding) {
return \strlen($s);
}
return @\iconv_strlen($s, $encoding);
}
public static function mb_strpos($haystack, $needle, $offset = 0, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('CP850' === $encoding || 'ASCII' === $encoding) {
return strpos($haystack, $needle, $offset);
}
$needle = (string) $needle;
if ('' === $needle) {
if (80000 > \PHP_VERSION_ID) {
trigger_error(__METHOD__.': Empty delimiter', \E_USER_WARNING);
return false;
}
return 0;
}
return \iconv_strpos($haystack, $needle, $offset, $encoding);
}
public static function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('CP850' === $encoding || 'ASCII' === $encoding) {
return strrpos($haystack, $needle, $offset);
}
if ($offset != (int) $offset) {
$offset = 0;
} elseif ($offset = (int) $offset) {
if ($offset < 0) {
if (0 > $offset += self::mb_strlen($needle)) {
$haystack = self::mb_substr($haystack, 0, $offset, $encoding);
}
$offset = 0;
} else {
$haystack = self::mb_substr($haystack, $offset, 2147483647, $encoding);
}
}
$pos = '' !== $needle || 80000 > \PHP_VERSION_ID
? \iconv_strrpos($haystack, $needle, $encoding)
: self::mb_strlen($haystack, $encoding);
return false !== $pos ? $offset + $pos : false;
}
public static function mb_str_split($string, $split_length = 1, $encoding = null)
{
if (null !== $string && !is_scalar($string) && !(\is_object($string) && method_exists($string, '__toString'))) {
trigger_error('mb_str_split() expects parameter 1 to be string, '.\gettype($string).' given', \E_USER_WARNING);
return null;
}
if (1 > $split_length = (int) $split_length) {
if (80000 > \PHP_VERSION_ID) {
trigger_error('The length of each segment must be greater than zero', \E_USER_WARNING);
return false;
}
throw new \ValueError('Argument #2 ($length) must be greater than 0');
}
if (null === $encoding) {
$encoding = mb_internal_encoding();
}
if ('UTF-8' === $encoding = self::getEncoding($encoding)) {
$rx = '/(';
while (65535 < $split_length) {
$rx .= '.{65535}';
$split_length -= 65535;
}
$rx .= '.{'.$split_length.'})/us';
return preg_split($rx, $string, null, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY);
}
$result = [];
$length = mb_strlen($string, $encoding);
for ($i = 0; $i < $length; $i += $split_length) {
$result[] = mb_substr($string, $i, $split_length, $encoding);
}
return $result;
}
public static function mb_strtolower($s, $encoding = null)
{
return self::mb_convert_case($s, \MB_CASE_LOWER, $encoding);
}
public static function mb_strtoupper($s, $encoding = null)
{
return self::mb_convert_case($s, \MB_CASE_UPPER, $encoding);
}
public static function mb_substitute_character($c = null)
{
if (null === $c) {
return 'none';
}
if (0 === strcasecmp($c, 'none')) {
return true;
}
if (80000 > \PHP_VERSION_ID) {
return false;
}
throw new \ValueError('Argument #1 ($substitute_character) must be "none", "long", "entity" or a valid codepoint');
}
public static function mb_substr($s, $start, $length = null, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('CP850' === $encoding || 'ASCII' === $encoding) {
return (string) substr($s, $start, null === $length ? 2147483647 : $length);
}
if ($start < 0) {
$start = \iconv_strlen($s, $encoding) + $start;
if ($start < 0) {
$start = 0;
}
}
if (null === $length) {
$length = 2147483647;
} elseif ($length < 0) {
$length = \iconv_strlen($s, $encoding) + $length - $start;
if ($length < 0) {
return '';
}
}
return (string) \iconv_substr($s, $start, $length, $encoding);
}
public static function mb_stripos($haystack, $needle, $offset = 0, $encoding = null)
{
$haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
$needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
return self::mb_strpos($haystack, $needle, $offset, $encoding);
}
public static function mb_stristr($haystack, $needle, $part = false, $encoding = null)
{
$pos = self::mb_stripos($haystack, $needle, 0, $encoding);
return self::getSubpart($pos, $part, $haystack, $encoding);
}
public static function mb_strrchr($haystack, $needle, $part = false, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('CP850' === $encoding || 'ASCII' === $encoding) {
$pos = strrpos($haystack, $needle);
} else {
$needle = self::mb_substr($needle, 0, 1, $encoding);
$pos = \iconv_strrpos($haystack, $needle, $encoding);
}
return self::getSubpart($pos, $part, $haystack, $encoding);
}
public static function mb_strrichr($haystack, $needle, $part = false, $encoding = null)
{
$needle = self::mb_substr($needle, 0, 1, $encoding);
$pos = self::mb_strripos($haystack, $needle, $encoding);
return self::getSubpart($pos, $part, $haystack, $encoding);
}
public static function mb_strripos($haystack, $needle, $offset = 0, $encoding = null)
{
$haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
$needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
return self::mb_strrpos($haystack, $needle, $offset, $encoding);
}
public static function mb_strstr($haystack, $needle, $part = false, $encoding = null)
{
$pos = strpos($haystack, $needle);
if (false === $pos) {
return false;
}
if ($part) {
return substr($haystack, 0, $pos);
}
return substr($haystack, $pos);
}
public static function mb_get_info($type = 'all')
{
$info = [
'internal_encoding' => self::$internalEncoding,
'http_output' => 'pass',
'http_output_conv_mimetypes' => '^(text/|application/xhtml\+xml)',
'func_overload' => 0,
'func_overload_list' => 'no overload',
'mail_charset' => 'UTF-8',
'mail_header_encoding' => 'BASE64',
'mail_body_encoding' => 'BASE64',
'illegal_chars' => 0,
'encoding_translation' => 'Off',
'language' => self::$language,
'detect_order' => self::$encodingList,
'substitute_character' => 'none',
'strict_detection' => 'Off',
];
if ('all' === $type) {
return $info;
}
if (isset($info[$type])) {
return $info[$type];
}
return false;
}
public static function mb_http_input($type = '')
{
return false;
}
public static function mb_http_output($encoding = null)
{
return null !== $encoding ? 'pass' === $encoding : 'pass';
}
public static function mb_strwidth($s, $encoding = null)
{
$encoding = self::getEncoding($encoding);
if ('UTF-8' !== $encoding) {
$s = \iconv($encoding, 'UTF-8//IGNORE', $s);
}
$s = preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
return ($wide << 1) + \iconv_strlen($s, 'UTF-8');
}
public static function mb_substr_count($haystack, $needle, $encoding = null)
{
return substr_count($haystack, $needle);
}
public static function mb_output_handler($contents, $status)
{
return $contents;
}
public static function mb_chr($code, $encoding = null)
{
if (0x80 > $code %= 0x200000) {
$s = \chr($code);
} elseif (0x800 > $code) {
$s = \chr(0xC0 | $code >> 6).\chr(0x80 | $code & 0x3F);
} elseif (0x10000 > $code) {
$s = \chr(0xE0 | $code >> 12).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
} else {
$s = \chr(0xF0 | $code >> 18).\chr(0x80 | $code >> 12 & 0x3F).\chr(0x80 | $code >> 6 & 0x3F).\chr(0x80 | $code & 0x3F);
}
if ('UTF-8' !== $encoding = self::getEncoding($encoding)) {
$s = mb_convert_encoding($s, $encoding, 'UTF-8');
}
return $s;
}
public static function mb_ord($s, $encoding = null)
{
if ('UTF-8' !== $encoding = self::getEncoding($encoding)) {
$s = mb_convert_encoding($s, 'UTF-8', $encoding);
}
if (1 === \strlen($s)) {
return \ord($s);
}
$code = ($s = unpack('C*', substr($s, 0, 4))) ? $s[1] : 0;
if (0xF0 <= $code) {
return (($code - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
}
if (0xE0 <= $code) {
return (($code - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
}
if (0xC0 <= $code) {
return (($code - 0xC0) << 6) + $s[2] - 0x80;
}
return $code;
}
private static function getSubpart($pos, $part, $haystack, $encoding)
{
if (false === $pos) {
return false;
}
if ($part) {
return self::mb_substr($haystack, 0, $pos, $encoding);
}
return self::mb_substr($haystack, $pos, null, $encoding);
}
private static function html_encoding_callback(array $m)
{
$i = 1;
$entities = '';
$m = unpack('C*', htmlentities($m[0], \ENT_COMPAT, 'UTF-8'));
while (isset($m[$i])) {
if (0x80 > $m[$i]) {
$entities .= \chr($m[$i++]);
continue;
}
if (0xF0 <= $m[$i]) {
$c = (($m[$i++] - 0xF0) << 18) + (($m[$i++] - 0x80) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
} elseif (0xE0 <= $m[$i]) {
$c = (($m[$i++] - 0xE0) << 12) + (($m[$i++] - 0x80) << 6) + $m[$i++] - 0x80;
} else {
$c = (($m[$i++] - 0xC0) << 6) + $m[$i++] - 0x80;
}
$entities .= '&#'.$c.';';
}
return $entities;
}
private static function title_case(array $s)
{
return self::mb_convert_case($s[1], \MB_CASE_UPPER, 'UTF-8').self::mb_convert_case($s[2], \MB_CASE_LOWER, 'UTF-8');
}
private static function getData($file)
{
if (file_exists($file = __DIR__.'/Resources/unidata/'.$file.'.php')) {
return require $file;
}
return false;
}
private static function getEncoding($encoding)
{
if (null === $encoding) {
return self::$internalEncoding;
}
if ('UTF-8' === $encoding) {
return 'UTF-8';
}
$encoding = strtoupper($encoding);
if ('8BIT' === $encoding || 'BINARY' === $encoding) {
return 'CP850';
}
if ('UTF8' === $encoding) {
return 'UTF-8';
}
return $encoding;
}
}

View File

@@ -0,0 +1,13 @@
Symfony Polyfill / Mbstring
===========================
This component provides a partial, native PHP implementation for the
[Mbstring](https://php.net/mbstring) extension.
More information can be found in the
[main Polyfill README](https://github.com/symfony/polyfill/blob/master/README.md).
License
=======
This library is released under the [MIT license](LICENSE).

View File

@@ -0,0 +1,147 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Symfony\Polyfill\Mbstring as p;
if (\PHP_VERSION_ID >= 80000) {
return require __DIR__.'/bootstrap80.php';
}
if (!function_exists('mb_convert_encoding')) {
function mb_convert_encoding($string, $to_encoding, $from_encoding = null) { return p\Mbstring::mb_convert_encoding($string, $to_encoding, $from_encoding); }
}
if (!function_exists('mb_decode_mimeheader')) {
function mb_decode_mimeheader($string) { return p\Mbstring::mb_decode_mimeheader($string); }
}
if (!function_exists('mb_encode_mimeheader')) {
function mb_encode_mimeheader($string, $charset = null, $transfer_encoding = null, $newline = "\r\n", $indent = 0) { return p\Mbstring::mb_encode_mimeheader($string, $charset, $transfer_encoding, $newline, $indent); }
}
if (!function_exists('mb_decode_numericentity')) {
function mb_decode_numericentity($string, $map, $encoding = null) { return p\Mbstring::mb_decode_numericentity($string, $map, $encoding); }
}
if (!function_exists('mb_encode_numericentity')) {
function mb_encode_numericentity($string, $map, $encoding = null, $hex = false) { return p\Mbstring::mb_encode_numericentity($string, $map, $encoding, $hex); }
}
if (!function_exists('mb_convert_case')) {
function mb_convert_case($string, $mode, $encoding = null) { return p\Mbstring::mb_convert_case($string, $mode, $encoding); }
}
if (!function_exists('mb_internal_encoding')) {
function mb_internal_encoding($encoding = null) { return p\Mbstring::mb_internal_encoding($encoding); }
}
if (!function_exists('mb_language')) {
function mb_language($language = null) { return p\Mbstring::mb_language($language); }
}
if (!function_exists('mb_list_encodings')) {
function mb_list_encodings() { return p\Mbstring::mb_list_encodings(); }
}
if (!function_exists('mb_encoding_aliases')) {
function mb_encoding_aliases($encoding) { return p\Mbstring::mb_encoding_aliases($encoding); }
}
if (!function_exists('mb_check_encoding')) {
function mb_check_encoding($value = null, $encoding = null) { return p\Mbstring::mb_check_encoding($value, $encoding); }
}
if (!function_exists('mb_detect_encoding')) {
function mb_detect_encoding($string, $encodings = null, $strict = false) { return p\Mbstring::mb_detect_encoding($string, $encodings, $strict); }
}
if (!function_exists('mb_detect_order')) {
function mb_detect_order($encoding = null) { return p\Mbstring::mb_detect_order($encoding); }
}
if (!function_exists('mb_parse_str')) {
function mb_parse_str($string, &$result = []) { parse_str($string, $result); return (bool) $result; }
}
if (!function_exists('mb_strlen')) {
function mb_strlen($string, $encoding = null) { return p\Mbstring::mb_strlen($string, $encoding); }
}
if (!function_exists('mb_strpos')) {
function mb_strpos($haystack, $needle, $offset = 0, $encoding = null) { return p\Mbstring::mb_strpos($haystack, $needle, $offset, $encoding); }
}
if (!function_exists('mb_strtolower')) {
function mb_strtolower($string, $encoding = null) { return p\Mbstring::mb_strtolower($string, $encoding); }
}
if (!function_exists('mb_strtoupper')) {
function mb_strtoupper($string, $encoding = null) { return p\Mbstring::mb_strtoupper($string, $encoding); }
}
if (!function_exists('mb_substitute_character')) {
function mb_substitute_character($substitute_character = null) { return p\Mbstring::mb_substitute_character($substitute_character); }
}
if (!function_exists('mb_substr')) {
function mb_substr($string, $start, $length = 2147483647, $encoding = null) { return p\Mbstring::mb_substr($string, $start, $length, $encoding); }
}
if (!function_exists('mb_stripos')) {
function mb_stripos($haystack, $needle, $offset = 0, $encoding = null) { return p\Mbstring::mb_stripos($haystack, $needle, $offset, $encoding); }
}
if (!function_exists('mb_stristr')) {
function mb_stristr($haystack, $needle, $before_needle = false, $encoding = null) { return p\Mbstring::mb_stristr($haystack, $needle, $before_needle, $encoding); }
}
if (!function_exists('mb_strrchr')) {
function mb_strrchr($haystack, $needle, $before_needle = false, $encoding = null) { return p\Mbstring::mb_strrchr($haystack, $needle, $before_needle, $encoding); }
}
if (!function_exists('mb_strrichr')) {
function mb_strrichr($haystack, $needle, $before_needle = false, $encoding = null) { return p\Mbstring::mb_strrichr($haystack, $needle, $before_needle, $encoding); }
}
if (!function_exists('mb_strripos')) {
function mb_strripos($haystack, $needle, $offset = 0, $encoding = null) { return p\Mbstring::mb_strripos($haystack, $needle, $offset, $encoding); }
}
if (!function_exists('mb_strrpos')) {
function mb_strrpos($haystack, $needle, $offset = 0, $encoding = null) { return p\Mbstring::mb_strrpos($haystack, $needle, $offset, $encoding); }
}
if (!function_exists('mb_strstr')) {
function mb_strstr($haystack, $needle, $before_needle = false, $encoding = null) { return p\Mbstring::mb_strstr($haystack, $needle, $before_needle, $encoding); }
}
if (!function_exists('mb_get_info')) {
function mb_get_info($type = 'all') { return p\Mbstring::mb_get_info($type); }
}
if (!function_exists('mb_http_output')) {
function mb_http_output($encoding = null) { return p\Mbstring::mb_http_output($encoding); }
}
if (!function_exists('mb_strwidth')) {
function mb_strwidth($string, $encoding = null) { return p\Mbstring::mb_strwidth($string, $encoding); }
}
if (!function_exists('mb_substr_count')) {
function mb_substr_count($haystack, $needle, $encoding = null) { return p\Mbstring::mb_substr_count($haystack, $needle, $encoding); }
}
if (!function_exists('mb_output_handler')) {
function mb_output_handler($string, $status) { return p\Mbstring::mb_output_handler($string, $status); }
}
if (!function_exists('mb_http_input')) {
function mb_http_input($type = null) { return p\Mbstring::mb_http_input($type); }
}
if (!function_exists('mb_convert_variables')) {
function mb_convert_variables($to_encoding, $from_encoding, &...$vars) { return p\Mbstring::mb_convert_variables($to_encoding, $from_encoding, ...$vars); }
}
if (!function_exists('mb_ord')) {
function mb_ord($string, $encoding = null) { return p\Mbstring::mb_ord($string, $encoding); }
}
if (!function_exists('mb_chr')) {
function mb_chr($codepoint, $encoding = null) { return p\Mbstring::mb_chr($codepoint, $encoding); }
}
if (!function_exists('mb_scrub')) {
function mb_scrub($string, $encoding = null) { $encoding = null === $encoding ? mb_internal_encoding() : $encoding; return mb_convert_encoding($string, $encoding, $encoding); }
}
if (!function_exists('mb_str_split')) {
function mb_str_split($string, $length = 1, $encoding = null) { return p\Mbstring::mb_str_split($string, $length, $encoding); }
}
if (extension_loaded('mbstring')) {
return;
}
if (!defined('MB_CASE_UPPER')) {
define('MB_CASE_UPPER', 0);
}
if (!defined('MB_CASE_LOWER')) {
define('MB_CASE_LOWER', 1);
}
if (!defined('MB_CASE_TITLE')) {
define('MB_CASE_TITLE', 2);
}

View File

@@ -0,0 +1,143 @@
<?php
/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
use Symfony\Polyfill\Mbstring as p;
if (!function_exists('mb_convert_encoding')) {
function mb_convert_encoding(array|string|null $string, ?string $to_encoding, array|string|null $from_encoding = null): array|string|false { return p\Mbstring::mb_convert_encoding($string ?? '', (string) $to_encoding, $from_encoding); }
}
if (!function_exists('mb_decode_mimeheader')) {
function mb_decode_mimeheader(?string $string): string { return p\Mbstring::mb_decode_mimeheader((string) $string); }
}
if (!function_exists('mb_encode_mimeheader')) {
function mb_encode_mimeheader(?string $string, ?string $charset = null, ?string $transfer_encoding = null, ?string $newline = "\r\n", ?int $indent = 0): string { return p\Mbstring::mb_encode_mimeheader((string) $string, $charset, $transfer_encoding, (string) $newline, (int) $indent); }
}
if (!function_exists('mb_decode_numericentity')) {
function mb_decode_numericentity(?string $string, array $map, ?string $encoding = null): string { return p\Mbstring::mb_decode_numericentity((string) $string, $map, $encoding); }
}
if (!function_exists('mb_encode_numericentity')) {
function mb_encode_numericentity(?string $string, array $map, ?string $encoding = null, ?bool $hex = false): string { return p\Mbstring::mb_encode_numericentity((string) $string, $map, $encoding, (bool) $hex); }
}
if (!function_exists('mb_convert_case')) {
function mb_convert_case(?string $string, ?int $mode, ?string $encoding = null): string { return p\Mbstring::mb_convert_case((string) $string, (int) $mode, $encoding); }
}
if (!function_exists('mb_internal_encoding')) {
function mb_internal_encoding(?string $encoding = null): string|bool { return p\Mbstring::mb_internal_encoding($encoding); }
}
if (!function_exists('mb_language')) {
function mb_language(?string $language = null): string|bool { return p\Mbstring::mb_language($language); }
}
if (!function_exists('mb_list_encodings')) {
function mb_list_encodings(): array { return p\Mbstring::mb_list_encodings(); }
}
if (!function_exists('mb_encoding_aliases')) {
function mb_encoding_aliases(?string $encoding): array { return p\Mbstring::mb_encoding_aliases((string) $encoding); }
}
if (!function_exists('mb_check_encoding')) {
function mb_check_encoding(array|string|null $value = null, ?string $encoding = null): bool { return p\Mbstring::mb_check_encoding($value, $encoding); }
}
if (!function_exists('mb_detect_encoding')) {
function mb_detect_encoding(?string $string, array|string|null $encodings = null, ?bool $strict = false): string|false { return p\Mbstring::mb_detect_encoding((string) $string, $encodings, (bool) $strict); }
}
if (!function_exists('mb_detect_order')) {
function mb_detect_order(array|string|null $encoding = null): array|bool { return p\Mbstring::mb_detect_order($encoding); }
}
if (!function_exists('mb_parse_str')) {
function mb_parse_str(?string $string, &$result = []): bool { parse_str((string) $string, $result); return (bool) $result; }
}
if (!function_exists('mb_strlen')) {
function mb_strlen(?string $string, ?string $encoding = null): int { return p\Mbstring::mb_strlen((string) $string, $encoding); }
}
if (!function_exists('mb_strpos')) {
function mb_strpos(?string $haystack, ?string $needle, ?int $offset = 0, ?string $encoding = null): int|false { return p\Mbstring::mb_strpos((string) $haystack, (string) $needle, (int) $offset, $encoding); }
}
if (!function_exists('mb_strtolower')) {
function mb_strtolower(?string $string, ?string $encoding = null): string { return p\Mbstring::mb_strtolower((string) $string, $encoding); }
}
if (!function_exists('mb_strtoupper')) {
function mb_strtoupper(?string $string, ?string $encoding = null): string { return p\Mbstring::mb_strtoupper((string) $string, $encoding); }
}
if (!function_exists('mb_substitute_character')) {
function mb_substitute_character(string|int|null $substitute_character = null): string|int|bool { return p\Mbstring::mb_substitute_character($substitute_character); }
}
if (!function_exists('mb_substr')) {
function mb_substr(?string $string, ?int $start, ?int $length = null, ?string $encoding = null): string { return p\Mbstring::mb_substr((string) $string, (int) $start, $length, $encoding); }
}
if (!function_exists('mb_stripos')) {
function mb_stripos(?string $haystack, ?string $needle, ?int $offset = 0, ?string $encoding = null): int|false { return p\Mbstring::mb_stripos((string) $haystack, (string) $needle, (int) $offset, $encoding); }
}
if (!function_exists('mb_stristr')) {
function mb_stristr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_stristr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
}
if (!function_exists('mb_strrchr')) {
function mb_strrchr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strrchr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
}
if (!function_exists('mb_strrichr')) {
function mb_strrichr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strrichr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
}
if (!function_exists('mb_strripos')) {
function mb_strripos(?string $haystack, ?string $needle, ?int $offset = 0, ?string $encoding = null): int|false { return p\Mbstring::mb_strripos((string) $haystack, (string) $needle, (int) $offset, $encoding); }
}
if (!function_exists('mb_strrpos')) {
function mb_strrpos(?string $haystack, ?string $needle, ?int $offset = 0, ?string $encoding = null): int|false { return p\Mbstring::mb_strrpos((string) $haystack, (string) $needle, (int) $offset, $encoding); }
}
if (!function_exists('mb_strstr')) {
function mb_strstr(?string $haystack, ?string $needle, ?bool $before_needle = false, ?string $encoding = null): string|false { return p\Mbstring::mb_strstr((string) $haystack, (string) $needle, (bool) $before_needle, $encoding); }
}
if (!function_exists('mb_get_info')) {
function mb_get_info(?string $type = 'all'): array|string|int|false { return p\Mbstring::mb_get_info((string) $type); }
}
if (!function_exists('mb_http_output')) {
function mb_http_output(?string $encoding = null): string|bool { return p\Mbstring::mb_http_output($encoding); }
}
if (!function_exists('mb_strwidth')) {
function mb_strwidth(?string $string, ?string $encoding = null): int { return p\Mbstring::mb_strwidth((string) $string, $encoding); }
}
if (!function_exists('mb_substr_count')) {
function mb_substr_count(?string $haystack, ?string $needle, ?string $encoding = null): int { return p\Mbstring::mb_substr_count((string) $haystack, (string) $needle, $encoding); }
}
if (!function_exists('mb_output_handler')) {
function mb_output_handler(?string $string, ?int $status): string { return p\Mbstring::mb_output_handler((string) $string, (int) $status); }
}
if (!function_exists('mb_http_input')) {
function mb_http_input(?string $type = null): array|string|false { return p\Mbstring::mb_http_input($type); }
}
if (!function_exists('mb_convert_variables')) {
function mb_convert_variables(?string $to_encoding, array|string|null $from_encoding, mixed &$var, mixed &...$vars): string|false { return p\Mbstring::mb_convert_variables((string) $to_encoding, $from_encoding ?? '', $var, ...$vars); }
}
if (!function_exists('mb_ord')) {
function mb_ord(?string $string, ?string $encoding = null): int|false { return p\Mbstring::mb_ord((string) $string, $encoding); }
}
if (!function_exists('mb_chr')) {
function mb_chr(?int $codepoint, ?string $encoding = null): string|false { return p\Mbstring::mb_chr((int) $codepoint, $encoding); }
}
if (!function_exists('mb_scrub')) {
function mb_scrub(?string $string, ?string $encoding = null): string { $encoding ??= mb_internal_encoding(); return mb_convert_encoding((string) $string, $encoding, $encoding); }
}
if (!function_exists('mb_str_split')) {
function mb_str_split(?string $string, ?int $length = 1, ?string $encoding = null): array { return p\Mbstring::mb_str_split((string) $string, (int) $length, $encoding); }
}
if (extension_loaded('mbstring')) {
return;
}
if (!defined('MB_CASE_UPPER')) {
define('MB_CASE_UPPER', 0);
}
if (!defined('MB_CASE_LOWER')) {
define('MB_CASE_LOWER', 1);
}
if (!defined('MB_CASE_TITLE')) {
define('MB_CASE_TITLE', 2);
}

View File

@@ -0,0 +1,38 @@
{
"name": "symfony/polyfill-mbstring",
"type": "library",
"description": "Symfony polyfill for the Mbstring extension",
"keywords": ["polyfill", "shim", "compatibility", "portable", "mbstring"],
"homepage": "https://symfony.com",
"license": "MIT",
"authors": [
{
"name": "Nicolas Grekas",
"email": "p@tchwork.com"
},
{
"name": "Symfony Community",
"homepage": "https://symfony.com/contributors"
}
],
"require": {
"php": ">=7.1"
},
"autoload": {
"psr-4": { "Symfony\\Polyfill\\Mbstring\\": "" },
"files": [ "bootstrap.php" ]
},
"suggest": {
"ext-mbstring": "For best performance"
},
"minimum-stability": "dev",
"extra": {
"branch-alias": {
"dev-main": "1.23-dev"
},
"thanks": {
"name": "symfony/polyfill",
"url": "https://github.com/symfony/polyfill"
}
}
}

View File

@@ -0,0 +1,15 @@
<?php
function classLoader($class) {
$path = str_replace(
['\\', 'Asan' . DIRECTORY_SEPARATOR . 'PHPExcel' . DIRECTORY_SEPARATOR], [DIRECTORY_SEPARATOR, ''], $class
);
$file = __DIR__ . DIRECTORY_SEPARATOR . 'src' . DIRECTORY_SEPARATOR . $path . '.php';
if (file_exists($file)) {
require_once $file;
}
}
spl_autoload_register('classLoader');

View File

@@ -0,0 +1,12 @@
<?php
/**
* Reader Interface
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Contract;
interface ReaderInterface extends \SeekableIterator, \Countable {
}

View File

@@ -0,0 +1,114 @@
<?php
/**
* PHP Excel
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel;
use Asan\PHPExcel\Exception\ReaderException;
class Excel {
/**
* Load a file
*
* @param string $file
* @param callback|null $callback
* @param string|null $encoding
* @param string $ext
* @param string $logPath
*
* @throws ReaderException
* @return \Asan\PHPExcel\Reader\BaseReader
*/
public static function load($file, $callback = null, $encoding = null, $ext = '', $logPath = '') {
set_error_handler(function($errorNo, $errorMsg, $errorFile, $errorLine) use ($logPath) {
if ($logPath) {
if (!file_exists($logPath)) {
mkdir($logPath, 0755, true);
}
$content = sprintf(
"%s\t%s.%s\t%s\t%s", date("Y-m-d H:i:s"), self::class, 'ERROR',
"[$errorNo]$errorMsg in $errorFile:$errorLine", PHP_EOL
);
file_put_contents("$logPath/excel-" . date('Y-m-d'). '.log', $content, FILE_APPEND);
}
}, E_ALL ^ E_ERROR);
$ext = $ext ?: strtolower(pathinfo($file, PATHINFO_EXTENSION));
$format = self::getFormatByExtension($ext);
if (empty($format)) {
throw new ReaderException("Could not identify file format for file [$file] with extension [$ext]");
}
$class = __NAMESPACE__ . '\\Reader\\' . $format;
$reader = new $class;
if ($callback) {
if ($callback instanceof \Closure) {
// Do the callback
call_user_func($callback, $reader);
} elseif (is_string($callback)) {
// Set the encoding
$encoding = $callback;
}
}
if ($encoding && method_exists($reader, 'setInputEncoding')) {
$reader->setInputEncoding($encoding);
}
return $reader->load($file);
}
/**
* Identify file format
*
* @param string $ext
* @return string
*/
protected static function getFormatByExtension($ext) {
$formart = '';
switch ($ext) {
/*
|--------------------------------------------------------------------------
| Excel 2007
|--------------------------------------------------------------------------
*/
case 'xlsx':
case 'xlsm':
case 'xltx':
case 'xltm':
$formart = 'Xlsx';
break;
/*
|--------------------------------------------------------------------------
| Excel5
|--------------------------------------------------------------------------
*/
case 'xls':
case 'xlt':
$formart = 'Xls';
break;
/*
|--------------------------------------------------------------------------
| CSV
|--------------------------------------------------------------------------
*/
case 'csv':
case 'txt':
$formart = 'Csv';
break;
}
return $formart;
}
}

View File

@@ -0,0 +1,12 @@
<?php
/**
* Parser Exception
*
* @author Janson
* @create 2017-11-27
*/
namespace Asan\PHPExcel\Exception;
class ParserException extends \Exception {
}

View File

@@ -0,0 +1,12 @@
<?php
/**
* Reader Exception
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Exception;
class ReaderException extends \Exception {
}

View File

@@ -0,0 +1,839 @@
<?php
/**
* Excel2017 Parser
*
* @author Janson
* @create 2017-12-02
*/
namespace Asan\PHPExcel\Parser;
use Asan\PHPExcel\Exception\ParserException;
use Asan\PHPExcel\Exception\ReaderException;
class Excel2007 {
const CELL_TYPE_SHARED_STR = 's';
/**
* Temporary directory
*
* @var string
*/
protected $tmpDir;
/**
* ZipArchive reader
*
* @var \ZipArchive
*/
protected $zip;
/**
* Worksheet reader
*
* @var \XMLReader
*/
protected $worksheetXML;
/**
* SharedStrings reader
*
* @var \XMLReader
*/
protected $sharedStringsXML;
/**
* SharedStrings position
*
* @var array
*/
private $sharedStringsPosition = -1;
/**
* The current sheet of the file
*
* @var int
*/
private $sheetIndex = 0;
/**
* Ignore empty row
*
* @var bool
*/
private $ignoreEmpty = false;
/**
* Style xfs
*
* @var array
*/
private $styleXfs;
/**
* Number formats
*
* @var array
*/
private $formats;
/**
* Parsed number formats
*
* @var array
*/
private $parsedFormats;
/**
* Worksheets
*
* @var array
*/
private $sheets;
/**
* Default options for libxml loader
*
* @var int
*/
private static $libXmlLoaderOptions;
/**
* Base date
*
* @var \DateTime
*/
private static $baseDate;
private static $decimalSeparator = '.';
private static $thousandSeparator = ',';
private static $currencyCode = '';
private static $runtimeInfo = ['GMPSupported' => false];
/**
* Use ZipArchive reader to extract the relevant data streams from the ZipArchive file
*
* @throws ParserException|ReaderException
* @param string $file
*/
public function loadZip($file) {
$this->openFile($file);
// Setting base date
if (!self::$baseDate) {
self::$baseDate = new \DateTime;
self::$baseDate->setTimezone(new \DateTimeZone('UTC'));
self::$baseDate->setDate(1900, 1, 0);
self::$baseDate->setTime(0, 0, 0);
}
if (function_exists('gmp_gcd')) {
self::$runtimeInfo['GMPSupported'] = true;
}
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*
* @return $this
*/
public function ignoreEmptyRow($ignoreEmpty) {
$this->ignoreEmpty = $ignoreEmpty;
return $this;
}
/**
* Whether is ignore empty row
*
* @return bool
*/
public function isIgnoreEmptyRow() {
return $this->ignoreEmpty;
}
/**
* Set sheet index
*
* @param int $index
*
* @return $this
*/
public function setSheetIndex($index) {
if ($index != $this->sheetIndex) {
$this->sheetIndex = $index;
$this->getWorksheetXML();
}
return $this;
}
/**
* Get sheet index
*
* @return int
*/
public function getSheetIndex() {
return $this->sheetIndex;
}
/**
* Return worksheet info (Name, Last Column Letter, Last Column Index, Total Rows, Total Columns)
*
* @throws ReaderException
* @return array
*/
public function parseWorksheetInfo() {
if ($this->sheets === null) {
$workbookXML = simplexml_load_string(
$this->securityScan($this->zip->getFromName('xl/workbook.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
);
$this->sheets = [];
if (isset($workbookXML->sheets) && $workbookXML->sheets) {
$xml = new \XMLReader();
$index = 0;
foreach ($workbookXML->sheets->sheet as $sheet) {
$info = [
'name' => (string)$sheet['name'], 'lastColumnLetter' => '', 'lastColumnIndex' => 0,
'totalRows' => 0, 'totalColumns' => 0
];
$this->zip->extractTo($this->tmpDir, $file = 'xl/worksheets/sheet' . (++$index) . '.xml');
$xml->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
$xml->setParserProperty(\XMLReader::DEFAULTATTRS, true);
$nonEmpty = false;
$columnLetter = '';
while ($xml->read()) {
if ($xml->name == 'row') {
if (!$this->ignoreEmpty && $xml->nodeType == \XMLReader::ELEMENT) {
$info['totalRows'] = (int)$xml->getAttribute('r');
} elseif ($xml->nodeType == \XMLReader::END_ELEMENT) {
if ($this->ignoreEmpty && $nonEmpty) {
$info['totalRows']++;
$nonEmpty = false;
}
if ($columnLetter > $info['lastColumnLetter']) {
$info['lastColumnLetter'] = $columnLetter;
}
}
} elseif ($xml->name == 'c' && $xml->nodeType == \XMLReader::ELEMENT) {
$columnLetter = preg_replace('{[^[:alpha:]]}S', '', $xml->getAttribute('r'));
} elseif ($this->ignoreEmpty && !$nonEmpty && $xml->name == 'v'
&& $xml->nodeType == \XMLReader::ELEMENT && trim($xml->readString()) !== '') {
$nonEmpty = true;
}
}
if ($info['lastColumnLetter']) {
$info['totalColumns'] = Format::columnIndexFromString($info['lastColumnLetter']);
$info['lastColumnIndex'] = $info['totalColumns'] - 1;
}
$this->sheets[] = $info;
}
$xml->close();
}
}
return $this->sheets;
}
/**
* Get shared string
*
* @param int $position
* @return string
*/
protected function getSharedString($position) {
$value = '';
$file = 'xl/sharedStrings.xml';
if ($this->sharedStringsXML === null) {
$this->sharedStringsXML = new \XMLReader();
$this->zip->extractTo($this->tmpDir, $file);
}
if ($this->sharedStringsPosition < 0 || $position < $this->sharedStringsPosition) {
$this->sharedStringsXML->open($this->tmpDir . '/' . $file, null, self::getLibXmlLoaderOptions());
$this->sharedStringsPosition = -1;
}
while ($this->sharedStringsXML->read()) {
$name = $this->sharedStringsXML->name;
$nodeType = $this->sharedStringsXML->nodeType;
if ($name == 'si') {
if ($nodeType == \XMLReader::ELEMENT) {
$this->sharedStringsPosition++;
} elseif ($position == $this->sharedStringsPosition && $nodeType == \XMLReader::END_ELEMENT) {
break;
}
} elseif ($name == 't' && $position == $this->sharedStringsPosition && $nodeType == \XMLReader::ELEMENT) {
$value .= trim($this->sharedStringsXML->readString());
}
}
return $value;
}
/**
* Parse styles info
*
* @throws ReaderException
*/
protected function parseStyles() {
if ($this->styleXfs === null) {
$stylesXML = simplexml_load_string(
$this->securityScan($this->zip->getFromName('xl/styles.xml')), 'SimpleXMLElement', self::getLibXmlLoaderOptions()
);
$this->styleXfs = $this->formats = [];
if ($stylesXML) {
if (isset($stylesXML->cellXfs->xf) && $stylesXML->cellXfs->xf) {
foreach ($stylesXML->cellXfs->xf as $xf) {
$numFmtId = isset($xf['numFmtId']) ? (int)$xf['numFmtId'] : 0;
if (isset($xf['applyNumberFormat']) || $numFmtId == 0) {
// If format ID >= 164, it is a custom format and should be read from styleSheet\numFmts
$this->styleXfs[] = $numFmtId;
} else {
// 0 for "General" format
$this->styleXfs[] = Format::FORMAT_GENERAL;
}
}
}
if (isset($stylesXML->numFmts->numFmt) && $stylesXML->numFmts->numFmt) {
foreach ($stylesXML->numFmts->numFmt as $numFmt) {
if (isset($numFmt['numFmtId'], $numFmt['formatCode'])) {
$this->formats[(int)$numFmt['numFmtId']] = (string)$numFmt['formatCode'];
}
}
}
}
}
}
/**
* Get worksheet XMLReader
*/
protected function getWorksheetXML() {
if ($this->worksheetXML === null) {
$this->worksheetXML = new \XMLReader();
}
$this->worksheetXML->open(
$this->tmpDir . '/xl/worksheets/sheet' . ($this->getSheetIndex() + 1) . '.xml',
null, self::getLibXmlLoaderOptions()
);
}
/**
* Get row data
*
* @param int $rowIndex
* @param int $columnLimit
*
* @throws ReaderException
* @return array|bool
*/
public function getRow($rowIndex, $columnLimit = 0) {
$this->parseStyles();
$rowIndex === 0 && $this->getWorksheetXML();
$sharedString = false;
$index = $styleId = 0;
$row = $columnLimit ? array_fill(0, $columnLimit, '') : [];
while ($canRead = $this->worksheetXML->read()) {
$name = $this->worksheetXML->name;
$type = $this->worksheetXML->nodeType;
// End of row
if ($name == 'row') {
if (!$this->ignoreEmpty && $type == \XMLReader::ELEMENT
&& $rowIndex+1 != (int)$this->worksheetXML->getAttribute('r')) {
$this->worksheetXML->moveToElement();
break;
}
if ($type == \XMLReader::END_ELEMENT) {
break;
}
}
if ($columnLimit > 0 && $index >= $columnLimit) {
continue;
}
switch ($name) {
// Cell
case 'c':
if ($type == \XMLReader::END_ELEMENT) {
continue;
}
$styleId = (int)$this->worksheetXML->getAttribute('s');
$letter = preg_replace('{[^[:alpha:]]}S', '', $this->worksheetXML->getAttribute('r'));
$index = Format::columnIndexFromString($letter) - 1;
// Determine cell type
$sharedString = false;
if ($this->worksheetXML->getAttribute('t') == self::CELL_TYPE_SHARED_STR) {
$sharedString = true;
}
break;
// Cell value
case 'v':
case 'is':
if ($type == \XMLReader::END_ELEMENT) {
continue;
}
$value = $this->worksheetXML->readString();
if ($sharedString) {
$value = $this->getSharedString($value);
}
// Format value if necessary
if ($value !== '' && $styleId && isset($this->styleXfs[$styleId])) {
$value = $this->formatValue($value, $styleId);
} elseif ($value && is_numeric($value)) {
$value = (float)$value;
}
$row[$index] = $value;
break;
}
}
if ($canRead === false) {
return false;
}
return $row;
}
/**
* Close ZipArchive、XMLReader and remove temp dir
*/
public function __destruct() {
if ($this->zip && $this->tmpDir) {
$this->zip->close();
}
if ($this->worksheetXML) {
$this->worksheetXML->close();
}
if ($this->sharedStringsXML) {
$this->sharedStringsXML->close();
}
$this->removeDir($this->tmpDir);
$this->zip = null;
$this->worksheetXML = null;
$this->sharedStringsXML = null;
$this->tmpDir = null;
}
/**
* Remove dir
*
* @param string $dir
*/
protected function removeDir($dir) {
if($dir && is_dir($dir)) {
$handle = opendir($dir);
while($item = readdir($handle)) {
if ($item != '.' && $item != '..') {
is_file($item = $dir . '/' . $item) ? unlink($item) : $this->removeDir($item);
}
}
closedir($handle);
rmdir($dir);
}
}
/**
* Formats the value according to the index
*
* @param string $value
* @param int $index Format index
*
* @throws \Exception
* @return string Formatted cell value
*/
private function formatValue($value, $index) {
if (!is_numeric($value)) {
return $value;
}
if (isset($this->styleXfs[$index]) && $this->styleXfs[$index] !== false) {
$index = $this->styleXfs[$index];
} else {
return $value;
}
// A special case for the "General" format
if ($index == 0) {
return is_numeric($value) ? (float)$value : $value;
}
$format = $this->parsedFormats[$index] ?? [];
if (empty($format)) {
$format = [
'code' => false, 'type' => false, 'scale' => 1, 'thousands' => false, 'currency' => false
];
if (isset(Format::$buildInFormats[$index])) {
$format['code'] = Format::$buildInFormats[$index];
} elseif (isset($this->formats[$index])) {
$format['code'] = str_replace('"', '', $this->formats[$index]);
}
// Format code found, now parsing the format
if ($format['code']) {
$sections = explode(';', $format['code']);
$format['code'] = $sections[0];
switch (count($sections)) {
case 2:
if ($value < 0) {
$format['code'] = $sections[1];
}
$value = abs($value);
break;
case 3:
case 4:
if ($value < 0) {
$format['code'] = $sections[1];
} elseif ($value == 0) {
$format['code'] = $sections[2];
}
$value = abs($value);
break;
}
}
// Stripping colors
$format['code'] = trim(preg_replace('/^\\[[a-zA-Z]+\\]/', '', $format['code']));
// Percentages
if (substr($format['code'], -1) == '%') {
$format['type'] = 'Percentage';
} elseif (preg_match('/(\[\$[A-Z]*-[0-9A-F]*\])*[hmsdy]/i', $format['code'])) {
$format['type'] = 'DateTime';
$format['code'] = trim(preg_replace('/^(\[\$[A-Z]*-[0-9A-F]*\])/i', '', $format['code']));
$format['code'] = strtolower($format['code']);
$format['code'] = strtr($format['code'], Format::$dateFormatReplacements);
if (strpos($format['code'], 'A') === false) {
$format['code'] = strtr($format['code'], Format::$dateFormatReplacements24);
} else {
$format['code'] = strtr($format['code'], Format::$dateFormatReplacements12);
}
} elseif ($format['code'] == '[$EUR ]#,##0.00_-') {
$format['type'] = 'Euro';
} else {
// Removing skipped characters
$format['code'] = preg_replace('/_./', '', $format['code']);
// Removing unnecessary escaping
$format['code'] = preg_replace("/\\\\/", '', $format['code']);
// Removing string quotes
$format['code'] = str_replace(['"', '*'], '', $format['code']);
// Removing thousands separator
if (strpos($format['code'], '0,0') !== false || strpos($format['code'], '#,#') !== false) {
$format['thousands'] = true;
}
$format['code'] = str_replace(['0,0', '#,#'], ['00', '##'], $format['code']);
// Scaling (Commas indicate the power)
$scale = 1;
$matches = [];
if (preg_match('/(0|#)(,+)/', $format['code'], $matches)) {
$scale = pow(1000, strlen($matches[2]));
// Removing the commas
$format['code'] = preg_replace(['/0,+/', '/#,+/'], ['0', '#'], $format['code']);
}
$format['scale'] = $scale;
if (preg_match('/#?.*\?\/\?/', $format['code'])) {
$format['type'] = 'Fraction';
} else {
$format['code'] = str_replace('#', '', $format['code']);
$matches = [];
if (preg_match('/(0+)(\.?)(0*)/', preg_replace('/\[[^\]]+\]/', '', $format['code']), $matches)) {
list(, $integer, $decimalPoint, $decimal) = $matches;
$format['minWidth'] = strlen($integer) + strlen($decimalPoint) + strlen($decimal);
$format['decimals'] = $decimal;
$format['precision'] = strlen($format['decimals']);
$format['pattern'] = '%0' . $format['minWidth'] . '.' . $format['precision'] . 'f';
}
}
$matches = [];
if (preg_match('/\[\$(.*)\]/u', $format['code'], $matches)) {
$currencyCode = explode('-', $matches[1]);
if ($currencyCode) {
$currencyCode = $currencyCode[0];
}
if (!$currencyCode) {
$currencyCode = self::$currencyCode;
}
$format['currency'] = $currencyCode;
}
$format['code'] = trim($format['code']);
}
$this->parsedFormats[$index] = $format;
}
// Applying format to value
if ($format) {
if ($format['code'] == '@') {
return (string)$value;
} elseif ($format['type'] == 'Percentage') { // Percentages
if ($format['code'] === '0%') {
$value = round(100*$value, 0) . '%';
} else {
$value = sprintf('%.2f%%', round(100*$value, 2));
}
} elseif ($format['type'] == 'DateTime') { // Dates and times
$days = (int)$value;
// Correcting for Feb 29, 1900
if ($days > 60) {
$days--;
}
// At this point time is a fraction of a day
$time = ($value - (int)$value);
// Here time is converted to seconds
// Some loss of precision will occur
$seconds = $time ? (int)($time*86400) : 0;
$value = clone self::$baseDate;
$value->add(new \DateInterval('P' . $days . 'D' . ($seconds ? 'T' . $seconds . 'S' : '')));
$value = $value->format($format['code']);
} elseif ($format['type'] == 'Euro') {
$value = 'EUR ' . sprintf('%1.2f', $value);
} else {
// Fractional numbers
if ($format['type'] == 'Fraction' && ($value != (int)$value)) {
$integer = floor(abs($value));
$decimal = fmod(abs($value), 1);
// Removing the integer part and decimal point
$decimal *= pow(10, strlen($decimal) - 2);
$decimalDivisor = pow(10, strlen($decimal));
if (self::$runtimeInfo['GMPSupported']) {
$GCD = gmp_strval(gmp_gcd($decimal, $decimalDivisor));
} else {
$GCD = self::GCD($decimal, $decimalDivisor);
}
$adjDecimal = $decimal/$GCD;
$adjDecimalDivisor = $decimalDivisor/$GCD;
if (strpos($format['code'], '0') !== false || strpos($format['code'], '#') !== false
|| substr($format['code'], 0, 3) == '? ?') {
// The integer part is shown separately apart from the fraction
$value = ($value < 0 ? '-' : '') . $integer ? $integer . ' '
: '' . $adjDecimal . '/' . $adjDecimalDivisor;
} else {
// The fraction includes the integer part
$adjDecimal += $integer * $adjDecimalDivisor;
$value = ($value < 0 ? '-' : '') . $adjDecimal . '/' . $adjDecimalDivisor;
}
} else {
// Scaling
$value = $value/$format['scale'];
if (!empty($format['minWidth']) && $format['decimals']) {
if ($format['thousands']) {
$value = number_format(
$value, $format['precision'], self::$decimalSeparator, self::$thousandSeparator
);
$value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
} else {
if (preg_match('/[0#]E[+-]0/i', $format['code'])) {
// Scientific format
$value = sprintf('%5.2E', $value);
} else {
$value = sprintf($format['pattern'], $value);
$value = preg_replace('/(0+)(\.?)(0*)/', $value, $format['code']);
}
}
}
}
// currency/Accounting
if ($format['currency']) {
$value = preg_replace('', $format['currency'], $value);
}
}
}
return $value;
}
/**
* Greatest common divisor calculation in case GMP extension is not enabled
*
* @param int $number1
* @param int $number2
*
* @return int
*/
private static function GCD($number1, $number2) {
$number1 = abs($number1);
$number2 = abs($number2);
if ($number1 + $number2 == 0) {
return 0;
}
$number = 1;
while ($number1 > 0) {
$number = $number1;
$number1 = $number2 % $number1;
$number2 = $number;
}
return $number;
}
/**
* Open file for reading
*
* @param string $file
*
* @throws ParserException|ReaderException
*/
public function openFile($file) {
// Check if file exists
if (!file_exists($file) || !is_readable($file)) {
throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
}
$this->zip = new \ZipArchive();
$xl = false;
if ($this->zip->open($file) === true) {
$this->tmpDir = sys_get_temp_dir() . '/' . uniqid();
// check if it is an OOXML archive
$rels = simplexml_load_string(
$this->securityScan($this->zip->getFromName('_rels/.rels')),
'SimpleXMLElement', self::getLibXmlLoaderOptions()
);
if ($rels !== false) {
foreach ($rels->Relationship as $rel) {
switch ($rel["Type"]) {
case "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument":
if ($rel["Target"] == 'xl/workbook.xml') {
$xl = true;
}
break;
}
}
}
}
if ($xl === false) {
throw new ParserException("The file [$file] is not recognised as a zip archive");
}
}
/**
* Scan theXML for use of <!ENTITY to prevent XXE/XEE attacks
*
* @param string $xml
*
* @throws ReaderException
* @return string
*/
protected function securityScan($xml) {
$pattern = sprintf('/\\0?%s\\0?/', implode('\\0?', str_split('<!DOCTYPE')));
if (preg_match($pattern, $xml)) {
throw new ReaderException(
'Detected use of ENTITY in XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'
);
}
return $xml;
}
/**
* Set default options for libxml loader
*
* @param int $options Default options for libxml loader
*/
public static function setLibXmlLoaderOptions($options = null) {
if (is_null($options) && defined(LIBXML_DTDLOAD)) {
$options = LIBXML_DTDLOAD | LIBXML_DTDATTR;
}
if (version_compare(PHP_VERSION, '5.2.11') >= 0) {
@libxml_disable_entity_loader($options == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
}
self::$libXmlLoaderOptions = $options;
}
/**
* Get default options for libxml loader.
* Defaults to LIBXML_DTDLOAD | LIBXML_DTDATTR when not set explicitly.
*
* @return int Default options for libxml loader
*/
public static function getLibXmlLoaderOptions() {
if (is_null(self::$libXmlLoaderOptions) && defined(LIBXML_DTDLOAD)) {
self::setLibXmlLoaderOptions(LIBXML_DTDLOAD | LIBXML_DTDATTR);
}
if (version_compare(PHP_VERSION, '5.2.11') >= 0) {
@libxml_disable_entity_loader(self::$libXmlLoaderOptions == (LIBXML_DTDLOAD | LIBXML_DTDATTR));
}
return self::$libXmlLoaderOptions;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,276 @@
<?php
/**
* OLE File Read
*
* @author Janson
* @create 2017-11-27
*/
namespace Asan\PHPExcel\Parser\Excel5;
use Asan\PHPExcel\Exception\ParserException;
use Asan\PHPExcel\Exception\ReaderException;
use Asan\PHPExcel\Parser\Format;
defined('IDENTIFIER_OLE') ||
define('IDENTIFIER_OLE', pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1));
class OLERead {
// OLE identifier
const IDENTIFIER_OLE = IDENTIFIER_OLE;
// Size of a sector = 512 bytes
const BIG_BLOCK_SIZE = 0x200;
// Size of a short sector = 64 bytes
const SMALL_BLOCK_SIZE = 0x40;
// Size of a directory entry always = 128 bytes
const PROPERTY_STORAGE_BLOCK_SIZE = 0x80;
// Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams
const SMALL_BLOCK_THRESHOLD = 0x1000;
// header offsets
const NUM_BIG_BLOCK_DEPOT_BLOCKS_POS = 0x2c;
const ROOT_START_BLOCK_POS = 0x30;
const SMALL_BLOCK_DEPOT_BLOCK_POS = 0x3c;
const EXTENSION_BLOCK_POS = 0x44;
const NUM_EXTENSION_BLOCK_POS = 0x48;
const BIG_BLOCK_DEPOT_BLOCKS_POS = 0x4c;
// property storage offsets (directory offsets)
const SIZE_OF_NAME_POS = 0x40;
const TYPE_POS = 0x42;
const START_BLOCK_POS = 0x74;
const SIZE_POS = 0x78;
public $workbook = null;
public $summaryInformation = null;
public $documentSummaryInformation = null;
protected $data = '';
protected $bigBlockChain = '';
protected $smallBlockChain = '';
protected $entry = '';
protected $props = [];
protected $rootEntry = 0;
protected $sbdStartBlock = 0;
protected $extensionBlock = 0;
protected $rootStartBlock = 0;
protected $numExtensionBlocks = 0;
protected $numBigBlockDepotBlocks = 0;
/**
* Read the file
*
* @throws ParserException|ReaderException
* @param string $file
*/
public function read($file) {
$this->openFile($file);
// Total number of sectors used for the SAT
$this->numBigBlockDepotBlocks = Format::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
// SecID of the first sector of the directory stream
$this->rootStartBlock = Format::getInt4d($this->data, self::ROOT_START_BLOCK_POS);
// SecID of the first sector of the SSAT (or -2 if not extant)
$this->sbdStartBlock = Format::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
// SecID of the first sector of the MSAT (or -2 if no additional sectors are used)
$this->extensionBlock = Format::getInt4d($this->data, self::EXTENSION_BLOCK_POS);
// Total number of sectors used by MSAT
$this->numExtensionBlocks = Format::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
$bigBlockDepotBlocks = [];
$pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS;
$bbdBlocks = $this->numBigBlockDepotBlocks;
if ($this->numExtensionBlocks != 0) {
$bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4;
}
for ($i = 0; $i < $bbdBlocks; ++$i) {
$bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos);
$pos += 4;
}
for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
$pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
$blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
$bigBlockDepotBlocks[$i] = Format::getInt4d($this->data, $pos);
$pos += 4;
}
$bbdBlocks += $blocksToRead;
if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
$this->extensionBlock = Format::getInt4d($this->data, $pos);
}
}
$this->bigBlockChain = '';
$bbs = self::BIG_BLOCK_SIZE / 4;
for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
$pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
$this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs);
}
$sbdBlock = $this->sbdStartBlock;
$this->smallBlockChain = '';
while ($sbdBlock != -2) {
$pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
$this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs);
$sbdBlock = Format::getInt4d($this->bigBlockChain, $sbdBlock * 4);
}
// read the directory stream
$block = $this->rootStartBlock;
$this->entry = $this->readData($block);
$this->readPropertySets();
}
/**
* Open file for reading
*
* @param string $file
*
* @throws ReaderException|ParserException
*/
public function openFile($file) {
// Check if file exists
if (!file_exists($file) || !is_readable($file)) {
throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
}
// Get the file data
$this->data = file_get_contents($file);
// Check OLE identifier
if (empty($this->data) || substr($this->data, 0, 8) != self::IDENTIFIER_OLE) {
throw new ParserException("The file [$file] is not recognised as an OLE file");
}
}
/**
* Extract binary stream data.
*
* @param int $stream
*
* @return string|null
*/
public function getStream($stream) {
if ($stream === null) {
return null;
}
$streamData = '';
if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) {
$rootData = $this->readData($this->props[$this->rootEntry]['startBlock']);
$block = $this->props[$stream]['startBlock'];
while ($block != -2) {
$pos = $block * self::SMALL_BLOCK_SIZE;
$streamData .= substr($rootData, $pos, self::SMALL_BLOCK_SIZE);
$block = Format::getInt4d($this->smallBlockChain, $block * 4);
}
return $streamData;
}
$numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE;
if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) {
++$numBlocks;
}
if ($numBlocks == 0) {
return '';
}
$block = $this->props[$stream]['startBlock'];
while ($block != -2) {
$pos = ($block + 1) * self::BIG_BLOCK_SIZE;
$streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
$block = Format::getInt4d($this->bigBlockChain, $block * 4);
}
return $streamData;
}
/**
* Read a standard stream (by joining sectors using information from SAT).
*
* @param int $bl Sector ID where the stream starts
*
* @return string
*/
protected function readData($bl) {
$block = $bl;
$data = '';
while ($block != -2) {
$pos = ($block + 1) * self::BIG_BLOCK_SIZE;
$data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
$block = Format::getInt4d($this->bigBlockChain, $block * 4);
}
return $data;
}
/**
* Read entries in the directory stream.
*/
protected function readPropertySets() {
$offset = 0;
// loop through entires, each entry is 128 bytes
$entryLen = strlen($this->entry);
while ($offset < $entryLen) {
// entry data (128 bytes)
$d = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE);
// size in bytes of name
$nameSize = ord($d[self::SIZE_OF_NAME_POS]) | (ord($d[self::SIZE_OF_NAME_POS + 1]) << 8);
// type of entry
$type = ord($d[self::TYPE_POS]);
// sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook)
// sectorID of first sector of the short-stream container stream, if this entry is root entry
$startBlock = Format::getInt4d($d, self::START_BLOCK_POS);
$size = Format::getInt4d($d, self::SIZE_POS);
$name = str_replace("\x00", '', substr($d, 0, $nameSize));
$this->props[] = [
'name' => $name,
'type' => $type,
'startBlock' => $startBlock,
'size' => $size,
];
// tmp helper to simplify checks
$upName = strtoupper($name);
// Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
if (($upName === 'WORKBOOK') || ($upName === 'BOOK')) {
$this->workbook = count($this->props) - 1;
} elseif ($upName === 'ROOT ENTRY' || $upName === 'R') {
// Root entry
$this->rootEntry = count($this->props) - 1;
}
// Summary information
if ($name == chr(5) . 'SummaryInformation') {
$this->summaryInformation = count($this->props) - 1;
}
// Additional Document Summary information
if ($name == chr(5) . 'DocumentSummaryInformation') {
$this->documentSummaryInformation = count($this->props) - 1;
}
$offset += self::PROPERTY_STORAGE_BLOCK_SIZE;
}
}
}

View File

@@ -0,0 +1,63 @@
<?php
/**
* Excel5 RC4
*
* @author Janson
* @create 2017-11-29
*/
namespace Asan\PHPExcel\Parser\Excel5;
class RC4 {
// Context
private $s = [];
private $i = 0;
private $j = 0;
/**
* RC4 stream decryption/encryption constrcutor
*
* @param string $key Encryption key/passphrase
*/
public function __construct($key) {
$len = strlen($key);
for ($this->i = 0; $this->i < 256; $this->i++) {
$this->s[$this->i] = $this->i;
}
$this->j = 0;
for ($this->i = 0; $this->i < 256; $this->i++) {
$this->j = ($this->j + $this->s[$this->i] + ord($key[$this->i % $len])) % 256;
$t = $this->s[$this->i];
$this->s[$this->i] = $this->s[$this->j];
$this->s[$this->j] = $t;
}
$this->i = $this->j = 0;
}
/**
* Symmetric decryption/encryption function
*
* @param string $data Data to encrypt/decrypt
*
* @return string
*/
public function RC4($data) {
$len = strlen($data);
for ($c = 0; $c < $len; $c++) {
$this->i = ($this->i + 1) % 256;
$this->j = ($this->j + $this->s[$this->i]) % 256;
$t = $this->s[$this->i];
$this->s[$this->i] = $this->s[$this->j];
$this->s[$this->j] = $t;
$t = ($this->s[$this->i] + $this->s[$this->j]) % 256;
$data[$c] = chr(ord($data[$c]) ^ $this->s[$t]);
}
return $data;
}
}

View File

@@ -0,0 +1,278 @@
<?php
/**
* Format helper
*
* @author Janson
* @create 2017-11-27
*/
namespace Asan\PHPExcel\Parser;
use Asan\PHPExcel\Exception\ParserException;
class Format {
//Base date of 1st Jan 1900 = 1.0
const CALENDAR_WINDOWS_1900 = 1900;
//Base date of 2nd Jan 1904 = 1.0
const CALENDAR_MAC_1904 = 1904;
// Pre-defined formats
const FORMAT_GENERAL = 'General';
const FORMAT_TEXT = '@';
const FORMAT_PERCENTAGE = '0%';
const FORMAT_PERCENTAGE_00 = '0.00%';
const FORMAT_CURRENCY_EUR_SIMPLE = '[$EUR ]#,##0.00_-';
public static $buildInFormats = [
0 => self::FORMAT_GENERAL,
1 => '0',
2 => '0.00',
3 => '#,##0',
4 => '#,##0.00',
5 => '"$"#,##0_),("$"#,##0)',
6 => '"$"#,##0_),[Red]("$"#,##0)',
7 => '"$"#,##0.00_),("$"#,##0.00)',
8 => '"$"#,##0.00_),[Red]("$"#,##0.00)',
9 => '0%',
10 => '0.00%',
//11 => '0.00E+00',
12 => '# ?/?',
13 => '# ??/??',
14 => 'yyyy/m/d',
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'yyyy/m/d h:mm',
// 补充
28 => 'm月d日',
31 => 'yyyy年m月d日',
32 => 'h时i分',
33 => 'h时i分ss秒',
34 => 'AM/PM h时i分',
35 => 'AM/PM h时i分ss秒',
55 => 'AM/PM h时i分',
56 => 'AM/PM h时i分ss秒',
58 => 'm月d日',
37 => '#,##0_),(#,##0)',
38 => '#,##0_),[Red](#,##0)',
39 => '#,##0.00_),(#,##0.00)',
40 => '#,##0.00_),[Red](#,##0.00)',
41 => '_("$"* #,##0_),_("$"* (#,##0),_("$"* "-"_),_(@_)',
42 => '_(* #,##0_),_(* (#,##0),_(* "-"_),_(@_)',
43 => '_(* #,##0.00_),_(* (#,##0.00),_(* "-"??_),_(@_)',
44 => '_("$"* #,##0.00_),_("$"* \(#,##0.00\),_("$"* "-"??_),_(@_)',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mm:ss.0',
48 => '##0.0E+0',
49 => '@',
// CHT
27 => 'yyyy年m月',
30 => 'm/d/yy',
36 => '[$-404]e/m/d',
50 => '[$-404]e/m/d',
57 => 'yyyy年m月',
// THA
59 => 't0',
60 => 't0.00',
61 => 't#,##0',
62 => 't#,##0.00',
67 => 't0%',
68 => 't0.00%',
69 => 't# ?/?',
70 => 't# ??/??'
];
/**
* Search/replace values to convert Excel date/time format masks to PHP format masks
*
* @var array
*/
public static $dateFormatReplacements = [
// first remove escapes related to non-format characters
'\\' => '',
// 12-hour suffix
'am/pm' => 'A',
// 2-digit year
'e' => 'Y',
'yyyy' => 'Y',
'yy' => 'y',
// first letter of month - no php equivalent
'mmmmm' => 'M',
// full month name
'mmmm' => 'F',
// short month name
'mmm' => 'M',
// mm is minutes if time, but can also be month w/leading zero
// so we try to identify times be the inclusion of a : separator in the mask
// It isn't perfect, but the best way I know how
':mm' => ':i',
'mm:' => 'i:',
// month leading zero
'mm' => 'm',
'm' => 'n',
// full day of week name
'dddd' => 'l',
// short day of week name
'ddd' => 'D',
// days leading zero
'dd' => 'd',
'd' => 'j',
// seconds
'ss' => 's',
// fractional seconds - no php equivalent
'.s' => ''
];
/**
* Search/replace values to convert Excel date/time format masks hours to PHP format masks (24 hr clock)
*
* @var array
*/
public static $dateFormatReplacements24 = [
'hh' => 'H',
'h' => 'G'
];
/**
* Search/replace values to convert Excel date/time format masks hours to PHP format masks (12 hr clock)
*
* @var array
*/
public static $dateFormatReplacements12 = [
'hh' => 'h',
'h' => 'g'
];
/**
* Column index from string
*
* @param string $label
*
* @throws \Exception
* @return int
*/
public static function columnIndexFromString($label = 'A') {
// Using a lookup cache adds a slight memory overhead, but boosts speed
// caching using a static within the method is faster than a class static,
// though it's additional memory overhead
static $indexCache = [];
if (isset($indexCache[$label])) {
return $indexCache[$label];
}
// It's surprising how costly the strtoupper() and ord() calls actually are, so we use a lookup array rather
// than use ord() and make it case insensitive to get rid of the strtoupper() as well. Because it's a static,
// there's no significant memory overhead either
static $columnLookup = [
'A' => 1, 'B' => 2, 'C' => 3, 'D' => 4, 'E' => 5, 'F' => 6, 'G' => 7, 'H' => 8, 'I' => 9, 'J' => 10,
'K' => 11, 'L' => 12, 'M' => 13, 'N' => 14, 'O' => 15, 'P' => 16, 'Q' => 17, 'R' => 18, 'S' => 19,
'T' => 20, 'U' => 21, 'V' => 22, 'W' => 23, 'X' => 24, 'Y' => 25, 'Z' => 26, 'a' => 1, 'b' => 2, 'c' => 3,
'd' => 4, 'e' => 5, 'f' => 6, 'g' => 7, 'h' => 8, 'i' => 9, 'j' => 10, 'k' => 11, 'l' => 12, 'm' => 13,
'n' => 14, 'o' => 15, 'p' => 16, 'q' => 17, 'r' => 18, 's' => 19, 't' => 20, 'u' => 21, 'v' => 22,
'w' => 23, 'x' => 24, 'y' => 25, 'z' => 26
];
// We also use the language construct isset() rather than the more costly strlen() function to match the length
// of $pString for improved performance
if (!isset($indexCache[$label])) {
if (!isset($label[0]) || isset($label[3])) {
throw new ParserException('Column string can not be empty or longer than 3 characters');
}
if (!isset($label[1])) {
$indexCache[$label] = $columnLookup[$label];
} elseif (!isset($label[2])) {
$indexCache[$label] = $columnLookup[$label[0]] * 26 + $columnLookup[$label[1]];
} else {
$indexCache[$label] = $columnLookup[$label[0]] * 676 + $columnLookup[$label[1]] * 26
+ $columnLookup[$label[2]];
}
}
return $indexCache[$label];
}
/**
* String from columnindex
*
* @param int $column
* @return string
*/
public static function stringFromColumnIndex($column = 0) {
// Using a lookup cache adds a slight memory overhead, but boosts speed
// caching using a static within the method is faster than a class static,
// though it's additional memory overhead
static $stringCache = [];
if (!isset($stringCache[$column])) {
// Determine column string
if ($column < 26) {
$stringCache[$column] = chr(65 + $column);
} elseif ($column < 702) {
$stringCache[$column] = chr(64 + ($column / 26)) . chr(65 + $column % 26);
} else {
$stringCache[$column] = chr(64 + (($column - 26) / 676)) . chr(65 + ((($column - 26) % 676) / 26))
. chr(65 + $column % 26);
}
}
return $stringCache[$column];
}
/**
* Read 16-bit unsigned integer
*
* @param string $data
* @param int $pos
* @return int
*/
public static function getUInt2d($data, $pos) {
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8);
}
/**
* Read 32-bit signed integer
*
* @param string $data
* @param int $pos
* @return int
*/
public static function getInt4d($data, $pos) {
// FIX: represent numbers correctly on 64-bit system
// http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
// Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
$ord24 = ord($data[$pos + 3]);
if ($ord24 >= 128) {
// negative number
$ord24 = -abs((256 - $ord24) << 24);
} else {
$ord24 = ($ord24 & 127) << 24;
}
return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $ord24;
}
}

View File

@@ -0,0 +1,162 @@
<?php
/**
* Reader Abstract
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Reader;
use Asan\PHPExcel\Contract\ReaderInterface;
abstract class BaseReader implements ReaderInterface {
/**
* Generator
*
* @var \Generator
*/
protected $generator;
/**
* File row count
*
* @var int
*/
protected $count;
/**
* Max row number
*
* @var int
*/
protected $rowLimit;
/**
* Max column number
*
* @var int
*/
protected $columnLimit;
/**
* Return the current element
*
* @return array
*/
public function current() {
return $this->generator->current();
}
/**
* Move forward to next element
*/
public function next() {
$this->generator->next();
}
/**
* Return the key of the current element
*
* @return int
*/
public function key() {
return $this->generator->key();
}
/**
* Checks if current position is valid
*
* @return bool
*/
public function valid() {
return $this->generator->valid();
}
/**
* Rewind the Iterator to the first element
*/
public function rewind() {
$this->generator = $this->makeGenerator();
}
/**
* Make the generator
*/
protected function makeGenerator() {
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*/
public function ignoreEmptyRow($ignoreEmpty = false) {
}
/**
* Set row limit
*
* @param int $limit
* @return $this
*/
public function setRowLimit($limit = null) {
$this->rowLimit = $limit;
return $this;
}
/**
* Get row limit
*
* @return int
*/
public function getRowLimit() {
return $this->rowLimit;
}
/**
* Set column limit
*
* @param int $limit
* @return $this
*/
public function setColumnLimit($limit = null) {
$this->columnLimit = $limit;
return $this;
}
/**
* Takes a row and traverses the file to that row
*
* @param int $row
*/
public function seek($row) {
if ($row <= 0) {
throw new \InvalidArgumentException("Row $row is invalid");
}
$key = $this->key();
if ($key !== --$row) {
if ($row < $key || is_null($key) || $row == 0) {
$this->rewind();
}
while ($this->valid() && $row > $this->key()) {
$this->next();
}
}
}
/**
* Get column limit
*
* @return int
*/
public function getColumnLimit() {
return $this->columnLimit;
}
}

View File

@@ -0,0 +1,319 @@
<?php
/**
* Csv Reader
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Reader;
use Asan\PHPExcel\Exception\ReaderException;
class Csv extends BaseReader {
/**
* File handle
*
* @var resource
*/
protected $fileHandle;
/**
* File read start
*
* @var int
*/
protected $start = 0;
/**
* Input encoding
*
* @var string
*/
protected $inputEncoding;
/**
* Delimiter
*
* @var string
*/
protected $delimiter;
/**
* Enclosure
*
* @var string
*/
protected $enclosure = '"';
/**
* Ignore empty row
*
* @var bool
*/
protected $ignoreEmpty = false;
/**
* Loads Excel from file
*
* @param string $file
*
* @throws ReaderException
* @return $this
*/
public function load($file) {
$lineEnding = ini_get('auto_detect_line_endings');
ini_set('auto_detect_line_endings', true);
// Open file
$this->openFile($file);
$this->autoDetection();
$this->generator = $this->makeGenerator();
ini_set('auto_detect_line_endings', $lineEnding);
return $this;
}
/**
* Count elements of the selected sheet
*
* @return int
*/
public function count() {
if ($this->count === null) {
$position = ftell($this->fileHandle);
$this->count = iterator_count($this->makeGenerator(true));
fseek($this->fileHandle, $position);
}
return $this->count;
}
/**
* Make the generator
*
* @param bool $calculate
* @return \Generator
*/
protected function makeGenerator($calculate = false) {
fseek($this->fileHandle, $this->start);
$finish = 0;
while (($row = fgetcsv($this->fileHandle, 0, $this->delimiter, $this->enclosure)) !== false) {
if ($this->ignoreEmpty && (empty($row) || trim(implode('', $row)) === '')) {
continue;
}
if ($calculate) {
yield;
continue;
}
if ($this->rowLimit > 0 && ++$finish > $this->rowLimit) {
break;
}
if ($this->columnLimit > 0) {
$row = array_slice($row, 0, $this->columnLimit);
}
foreach ($row as &$value) {
if ($value != '') {
if (is_numeric($value)) {
$value = (float)$value;
}
// Convert encoding if necessary
if ($this->inputEncoding !== 'UTF-8') {
$value = mb_convert_encoding($value, 'UTF-8', $this->inputEncoding);
}
}
}
unset($value);
yield $row;
}
}
/**
* Detect the file delimiter and encoding
*/
protected function autoDetection() {
if (($this->delimiter !== null && $this->inputEncoding !== null)
|| ($line = fgets($this->fileHandle)) === false) {
return;
}
if ($this->delimiter === null) {
$this->delimiter = ',';
if ((strlen(trim($line, "\r\n")) == 5) && (stripos($line, 'sep=') === 0)) {
$this->delimiter = substr($line, 4, 1);
}
}
if ($this->inputEncoding === null) {
$this->inputEncoding = 'UTF-8';
if (($bom = substr($line, 0, 4)) == "\xFF\xFE\x00\x00" || $bom == "\x00\x00\xFE\xFF") {
$this->start = 4;
$this->inputEncoding = 'UTF-32';
} elseif (($bom = substr($line, 0, 2)) == "\xFF\xFE" || $bom == "\xFE\xFF") {
$this->start = 2;
$this->inputEncoding = 'UTF-16';
} elseif (($bom = substr($line, 0, 3)) == "\xEF\xBB\xBF") {
$this->start = 3;
}
if (!$this->start) {
$encoding = mb_detect_encoding($line, 'ASCII, UTF-8, GB2312, GBK');
if ($encoding) {
if ($encoding == 'EUC-CN') {
$encoding = 'GB2312';
} elseif ($encoding == 'CP936') {
$encoding = 'GBK';
}
$this->inputEncoding = $encoding;
}
}
}
fseek($this->fileHandle, $this->start);
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*
* @return $this
*/
public function ignoreEmptyRow($ignoreEmpty = false) {
$this->ignoreEmpty = $ignoreEmpty;
return $this;
}
/**
* Set input encoding
*
* @param string $encoding
* @return $this
*/
public function setInputEncoding($encoding = 'UTF-8') {
$this->inputEncoding = $encoding;
return $this;
}
/**
* Get input encoding
*
* @return string
*/
public function getInputEncoding() {
return $this->inputEncoding;
}
/**
* Set delimiter
*
* @param string $delimiter Delimiter, defaults to ,
* @return $this
*/
public function setDelimiter($delimiter = ',') {
$this->delimiter = $delimiter;
return $this;
}
/**
* Get delimiter
*
* @return string
*/
public function getDelimiter() {
return $this->delimiter;
}
/**
* Set enclosure
*
* @param string $enclosure Enclosure, defaults to "
* @return $this
*/
public function setEnclosure($enclosure = '"') {
if ($enclosure == '') {
$enclosure = '"';
}
$this->enclosure = $enclosure;
return $this;
}
/**
* Get enclosure
*
* @return string
*/
public function getEnclosure() {
return $this->enclosure;
}
/**
* Can the current Reader read the file?
*
* @param string $file
*
* @return bool
*/
public function canRead($file) {
try {
$this->openFile($file);
} catch (\Exception $e) {
return false;
}
fclose($this->fileHandle);
return true;
}
/**
* Open file for reading
*
* @param string $file
*
* @throws ReaderException
*/
protected function openFile($file) {
// Check if file exists
if (!file_exists($file) || !is_readable($file)) {
throw new ReaderException("Could not open file [$file] for reading! File does not exist.");
}
// Open file
$this->fileHandle = fopen($file, 'r');
if ($this->fileHandle === false) {
throw new ReaderException("Could not open file [$file] for reading.");
}
}
/**
* Close file and release generator
*/
public function __destruct() {
if ($this->fileHandle) {
fclose($this->fileHandle);
}
$this->generator = null;
}
}

View File

@@ -0,0 +1,163 @@
<?php
/**
* Xls Reader
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Reader;
use Asan\PHPExcel\Parser\Excel5;
use Asan\PHPExcel\Parser\Excel5\OLERead;
class Xls extends BaseReader {
/**
* Xls parser
*
* @var Excel5
*/
protected $parser;
/**
* File row、column count
*
* @var array|int
*/
protected $count;
public function __construct() {
$this->parser = new Excel5();
}
/**
* Loads Excel from file
*
* @param string $file
*
* @return $this
*/
public function load($file) {
$this->parser->loadOLE($file);
$this->generator = $this->makeGenerator();
return $this;
}
/**
* Count elements of the selected sheet
*
* @param bool $all
* @return int|array
*/
public function count($all = false) {
if ($this->count === null) {
$row = $column = 0;
if ($sheet = $this->sheets($this->parser->getSheetIndex())) {
$row = $sheet['totalRows'] ?? 0;
$column = $sheet['totalColumns'] ?? 0;
}
$this->count = [
$this->rowLimit > 0 ? min($row, $this->rowLimit) : $row,
$this->columnLimit > 0 ? min($column, $this->columnLimit) : $column
];
}
return $all ? $this->count : $this->count[0];
}
/**
* Get the work sheets info
*
* @param int $index
* @return array
*/
public function sheets($index = null) {
$sheets = $this->parser->parseWorksheetInfo();
if ($index !== null) {
return $sheets[$index] ?? [];
}
return $sheets;
}
/**
* Make the generator
*
* @return \Generator
*/
protected function makeGenerator() {
list($rowLimit, $columnLimit) = $this->count(true);
$line = $finish = 0;
while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) {
if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') {
continue;
}
$finish++;
yield $row;
}
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*
* @return $this
*/
public function ignoreEmptyRow($ignoreEmpty = false) {
$this->parser->ignoreEmptyRow($ignoreEmpty);
return $this;
}
/**
* Set sheet index
*
* @param int $index
* @return $this
*/
public function setSheetIndex($index) {
if ($index != $this->parser->getSheetIndex()) {
$this->parser->setSheetIndex($index);
$this->count = null;
$this->rewind();
}
return $this;
}
/**
* Can the current Reader read the file?
*
* @param string $file
*
* @return bool
*/
public function canRead($file) {
try {
// Use ParseXL for the hard work.
$ole = new OLERead();
// open file
$ole->openFile($file);
} catch (\Exception $e) {
return false;
}
return true;
}
/**
* Release parser and generator
*/
public function __destruct() {
$this->parser = null;
$this->generator = null;
}
}

View File

@@ -0,0 +1,161 @@
<?php
/**
* Xlsx Reader
*
* @author Janson
* @create 2017-11-23
*/
namespace Asan\PHPExcel\Reader;
use Asan\PHPExcel\Parser\Excel2007;
class Xlsx extends BaseReader {
/**
* Xls parser
*
* @var Excel2007
*/
protected $parser;
/**
* File row、column count
*
* @var array|int
*/
protected $count;
public function __construct() {
$this->parser = new Excel2007();
}
/**
* Loads Excel from file
*
* @param string $file
*
* @return $this
*/
public function load($file) {
$this->parser->loadZip($file);
$this->generator = $this->makeGenerator();
return $this;
}
/**
* Count elements of an object
*
* @param bool $all
* @return int|array
*/
public function count($all = false) {
if ($this->count === null) {
$row = $column = 0;
if ($sheet = $this->sheets($this->parser->getSheetIndex())) {
$row = $sheet['totalRows'] ?? 0;
$column = $sheet['totalColumns'] ?? 0;
}
$this->count = [
$this->rowLimit > 0 ? min($row, $this->rowLimit) : $row,
$this->columnLimit > 0 ? min($column, $this->columnLimit) : $column
];
}
return $all ? $this->count : $this->count[0];
}
/**
* Get the work sheets info
*
* @param int $index
* @return array
*/
public function sheets($index = null) {
$sheets = $this->parser->parseWorksheetInfo();
if ($index !== null) {
return $sheets[$index] ?? [];
}
return $sheets;
}
/**
* Make the generator
*
* @return \Generator
*/
protected function makeGenerator() {
list($rowLimit, $columnLimit) = $this->count(true);
$line = $finish = 0;
while ($finish < $rowLimit && ($row = $this->parser->getRow($line++, $columnLimit)) !== false) {
if ($this->parser->isIgnoreEmptyRow() && trim(implode('', $row)) === '') {
continue;
}
$finish++;
yield $row;
}
}
/**
* Ignore empty row
*
* @param bool $ignoreEmpty
*
* @return $this
*/
public function ignoreEmptyRow($ignoreEmpty = false) {
$this->parser->ignoreEmptyRow($ignoreEmpty);
return $this;
}
/**
* Set sheet index
*
* @param int $index
* @return $this
*/
public function setSheetIndex($index = 0) {
if ($index != $this->parser->getSheetIndex()) {
$this->parser->setSheetIndex($index);
$this->count = null;
$this->rewind();
}
return $this;
}
/**
* Can the current Reader read the file?
*
* @param string $file
*
* @return bool
*/
public function canRead($file) {
try {
$parser = new Excel2007();
// open file
$parser->openFile($file);
} catch (\Exception $e) {
return false;
}
return true;
}
/**
* Release parser and generator
*/
public function __destruct() {
$this->parser = null;
$this->generator = null;
}
}

View File

@@ -0,0 +1,7 @@
<?php
// autoload.php @generated by Composer
require_once __DIR__ . '/composer/autoload_real.php';
return ComposerAutoloaderInit08d888cc0ebe5ec4e5236ffc732c0960::getLoader();

View File

@@ -0,0 +1,481 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer\Autoload;
/**
* ClassLoader implements a PSR-0, PSR-4 and classmap class loader.
*
* $loader = new \Composer\Autoload\ClassLoader();
*
* // register classes with namespaces
* $loader->add('Symfony\Component', __DIR__.'/component');
* $loader->add('Symfony', __DIR__.'/framework');
*
* // activate the autoloader
* $loader->register();
*
* // to enable searching the include path (eg. for PEAR packages)
* $loader->setUseIncludePath(true);
*
* In this example, if you try to use a class in the Symfony\Component
* namespace or one of its children (Symfony\Component\Console for instance),
* the autoloader will first look for the class under the component/
* directory, and it will then fallback to the framework/ directory if not
* found before giving up.
*
* This class is loosely based on the Symfony UniversalClassLoader.
*
* @author Fabien Potencier <fabien@symfony.com>
* @author Jordi Boggiano <j.boggiano@seld.be>
* @see https://www.php-fig.org/psr/psr-0/
* @see https://www.php-fig.org/psr/psr-4/
*/
class ClassLoader
{
private $vendorDir;
// PSR-4
private $prefixLengthsPsr4 = array();
private $prefixDirsPsr4 = array();
private $fallbackDirsPsr4 = array();
// PSR-0
private $prefixesPsr0 = array();
private $fallbackDirsPsr0 = array();
private $useIncludePath = false;
private $classMap = array();
private $classMapAuthoritative = false;
private $missingClasses = array();
private $apcuPrefix;
private static $registeredLoaders = array();
public function __construct($vendorDir = null)
{
$this->vendorDir = $vendorDir;
}
public function getPrefixes()
{
if (!empty($this->prefixesPsr0)) {
return call_user_func_array('array_merge', array_values($this->prefixesPsr0));
}
return array();
}
public function getPrefixesPsr4()
{
return $this->prefixDirsPsr4;
}
public function getFallbackDirs()
{
return $this->fallbackDirsPsr0;
}
public function getFallbackDirsPsr4()
{
return $this->fallbackDirsPsr4;
}
public function getClassMap()
{
return $this->classMap;
}
/**
* @param array $classMap Class to filename map
*/
public function addClassMap(array $classMap)
{
if ($this->classMap) {
$this->classMap = array_merge($this->classMap, $classMap);
} else {
$this->classMap = $classMap;
}
}
/**
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
*/
public function add($prefix, $paths, $prepend = false)
{
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
(array) $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
(array) $paths
);
}
return;
}
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
(array) $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
(array) $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
(array) $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
// Register directories for a new namespace.
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
(array) $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
(array) $paths
);
}
}
/**
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
* @param string $prefix The prefix
* @param array|string $paths The PSR-0 base directories
*/
public function set($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr0 = (array) $paths;
} else {
$this->prefixesPsr0[$prefix[0]][$prefix] = (array) $paths;
}
}
/**
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param array|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*/
public function setPsr4($prefix, $paths)
{
if (!$prefix) {
$this->fallbackDirsPsr4 = (array) $paths;
} else {
$length = strlen($prefix);
if ('\\' !== $prefix[$length - 1]) {
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
}
}
/**
* Turns on searching the include path for class files.
*
* @param bool $useIncludePath
*/
public function setUseIncludePath($useIncludePath)
{
$this->useIncludePath = $useIncludePath;
}
/**
* Can be used to check if the autoloader uses the include path to check
* for classes.
*
* @return bool
*/
public function getUseIncludePath()
{
return $this->useIncludePath;
}
/**
* Turns off searching the prefix and fallback directories for classes
* that have not been registered with the class map.
*
* @param bool $classMapAuthoritative
*/
public function setClassMapAuthoritative($classMapAuthoritative)
{
$this->classMapAuthoritative = $classMapAuthoritative;
}
/**
* Should class lookup fail if not found in the current class map?
*
* @return bool
*/
public function isClassMapAuthoritative()
{
return $this->classMapAuthoritative;
}
/**
* APCu prefix to use to cache found/not-found classes, if the extension is enabled.
*
* @param string|null $apcuPrefix
*/
public function setApcuPrefix($apcuPrefix)
{
$this->apcuPrefix = function_exists('apcu_fetch') && filter_var(ini_get('apc.enabled'), FILTER_VALIDATE_BOOLEAN) ? $apcuPrefix : null;
}
/**
* The APCu prefix in use, or null if APCu caching is not enabled.
*
* @return string|null
*/
public function getApcuPrefix()
{
return $this->apcuPrefix;
}
/**
* Registers this instance as an autoloader.
*
* @param bool $prepend Whether to prepend the autoloader or not
*/
public function register($prepend = false)
{
spl_autoload_register(array($this, 'loadClass'), true, $prepend);
if (null === $this->vendorDir) {
return;
}
if ($prepend) {
self::$registeredLoaders = array($this->vendorDir => $this) + self::$registeredLoaders;
} else {
unset(self::$registeredLoaders[$this->vendorDir]);
self::$registeredLoaders[$this->vendorDir] = $this;
}
}
/**
* Unregisters this instance as an autoloader.
*/
public function unregister()
{
spl_autoload_unregister(array($this, 'loadClass'));
if (null !== $this->vendorDir) {
unset(self::$registeredLoaders[$this->vendorDir]);
}
}
/**
* Loads the given class or interface.
*
* @param string $class The name of the class
* @return true|null True if loaded, null otherwise
*/
public function loadClass($class)
{
if ($file = $this->findFile($class)) {
includeFile($file);
return true;
}
return null;
}
/**
* Finds the path to the file where the class is defined.
*
* @param string $class The name of the class
*
* @return string|false The path if found, false otherwise
*/
public function findFile($class)
{
// class map lookup
if (isset($this->classMap[$class])) {
return $this->classMap[$class];
}
if ($this->classMapAuthoritative || isset($this->missingClasses[$class])) {
return false;
}
if (null !== $this->apcuPrefix) {
$file = apcu_fetch($this->apcuPrefix.$class, $hit);
if ($hit) {
return $file;
}
}
$file = $this->findFileWithExtension($class, '.php');
// Search for Hack files if we are running on HHVM
if (false === $file && defined('HHVM_VERSION')) {
$file = $this->findFileWithExtension($class, '.hh');
}
if (null !== $this->apcuPrefix) {
apcu_add($this->apcuPrefix.$class, $file);
}
if (false === $file) {
// Remember that this class does not exist.
$this->missingClasses[$class] = true;
}
return $file;
}
/**
* Returns the currently registered loaders indexed by their corresponding vendor directories.
*
* @return self[]
*/
public static function getRegisteredLoaders()
{
return self::$registeredLoaders;
}
private function findFileWithExtension($class, $ext)
{
// PSR-4 lookup
$logicalPathPsr4 = strtr($class, '\\', DIRECTORY_SEPARATOR) . $ext;
$first = $class[0];
if (isset($this->prefixLengthsPsr4[$first])) {
$subPath = $class;
while (false !== $lastPos = strrpos($subPath, '\\')) {
$subPath = substr($subPath, 0, $lastPos);
$search = $subPath . '\\';
if (isset($this->prefixDirsPsr4[$search])) {
$pathEnd = DIRECTORY_SEPARATOR . substr($logicalPathPsr4, $lastPos + 1);
foreach ($this->prefixDirsPsr4[$search] as $dir) {
if (file_exists($file = $dir . $pathEnd)) {
return $file;
}
}
}
}
}
// PSR-4 fallback dirs
foreach ($this->fallbackDirsPsr4 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr4)) {
return $file;
}
}
// PSR-0 lookup
if (false !== $pos = strrpos($class, '\\')) {
// namespaced class name
$logicalPathPsr0 = substr($logicalPathPsr4, 0, $pos + 1)
. strtr(substr($logicalPathPsr4, $pos + 1), '_', DIRECTORY_SEPARATOR);
} else {
// PEAR-like class name
$logicalPathPsr0 = strtr($class, '_', DIRECTORY_SEPARATOR) . $ext;
}
if (isset($this->prefixesPsr0[$first])) {
foreach ($this->prefixesPsr0[$first] as $prefix => $dirs) {
if (0 === strpos($class, $prefix)) {
foreach ($dirs as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
}
}
}
// PSR-0 fallback dirs
foreach ($this->fallbackDirsPsr0 as $dir) {
if (file_exists($file = $dir . DIRECTORY_SEPARATOR . $logicalPathPsr0)) {
return $file;
}
}
// PSR-0 include paths.
if ($this->useIncludePath && $file = stream_resolve_include_path($logicalPathPsr0)) {
return $file;
}
return false;
}
}
/**
* Scope isolated include.
*
* Prevents access to $this/self from included files.
*/
function includeFile($file)
{
include $file;
}

View File

@@ -0,0 +1,337 @@
<?php
/*
* This file is part of Composer.
*
* (c) Nils Adermann <naderman@naderman.de>
* Jordi Boggiano <j.boggiano@seld.be>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
namespace Composer;
use Composer\Autoload\ClassLoader;
use Composer\Semver\VersionParser;
/**
* This class is copied in every Composer installed project and available to all
*
* See also https://getcomposer.org/doc/07-runtime.md#installed-versions
*
* To require it's presence, you can require `composer-runtime-api ^2.0`
*/
class InstalledVersions
{
private static $installed;
private static $canGetVendors;
private static $installedByVendor = array();
/**
* Returns a list of all package names which are present, either by being installed, replaced or provided
*
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackages()
{
$packages = array();
foreach (self::getInstalled() as $installed) {
$packages[] = array_keys($installed['versions']);
}
if (1 === \count($packages)) {
return $packages[0];
}
return array_keys(array_flip(\call_user_func_array('array_merge', $packages)));
}
/**
* Returns a list of all package names with a specific type e.g. 'library'
*
* @param string $type
* @return string[]
* @psalm-return list<string>
*/
public static function getInstalledPackagesByType($type)
{
$packagesByType = array();
foreach (self::getInstalled() as $installed) {
foreach ($installed['versions'] as $name => $package) {
if (isset($package['type']) && $package['type'] === $type) {
$packagesByType[] = $name;
}
}
}
return $packagesByType;
}
/**
* Checks whether the given package is installed
*
* This also returns true if the package name is provided or replaced by another package
*
* @param string $packageName
* @param bool $includeDevRequirements
* @return bool
*/
public static function isInstalled($packageName, $includeDevRequirements = true)
{
foreach (self::getInstalled() as $installed) {
if (isset($installed['versions'][$packageName])) {
return $includeDevRequirements || empty($installed['versions'][$packageName]['dev_requirement']);
}
}
return false;
}
/**
* Checks whether the given package satisfies a version constraint
*
* e.g. If you want to know whether version 2.3+ of package foo/bar is installed, you would call:
*
* Composer\InstalledVersions::satisfies(new VersionParser, 'foo/bar', '^2.3')
*
* @param VersionParser $parser Install composer/semver to have access to this class and functionality
* @param string $packageName
* @param string|null $constraint A version constraint to check for, if you pass one you have to make sure composer/semver is required by your package
* @return bool
*/
public static function satisfies(VersionParser $parser, $packageName, $constraint)
{
$constraint = $parser->parseConstraints($constraint);
$provided = $parser->parseConstraints(self::getVersionRanges($packageName));
return $provided->matches($constraint);
}
/**
* Returns a version constraint representing all the range(s) which are installed for a given package
*
* It is easier to use this via isInstalled() with the $constraint argument if you need to check
* whether a given version of a package is installed, and not just whether it exists
*
* @param string $packageName
* @return string Version constraint usable with composer/semver
*/
public static function getVersionRanges($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
$ranges = array();
if (isset($installed['versions'][$packageName]['pretty_version'])) {
$ranges[] = $installed['versions'][$packageName]['pretty_version'];
}
if (array_key_exists('aliases', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['aliases']);
}
if (array_key_exists('replaced', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['replaced']);
}
if (array_key_exists('provided', $installed['versions'][$packageName])) {
$ranges = array_merge($ranges, $installed['versions'][$packageName]['provided']);
}
return implode(' || ', $ranges);
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['version'])) {
return null;
}
return $installed['versions'][$packageName]['version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as version, use satisfies or getVersionRanges if you need to know if a given version is present
*/
public static function getPrettyVersion($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['pretty_version'])) {
return null;
}
return $installed['versions'][$packageName]['pretty_version'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as reference
*/
public static function getReference($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
if (!isset($installed['versions'][$packageName]['reference'])) {
return null;
}
return $installed['versions'][$packageName]['reference'];
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @param string $packageName
* @return string|null If the package is being replaced or provided but is not really installed, null will be returned as install path. Packages of type metapackages also have a null install path.
*/
public static function getInstallPath($packageName)
{
foreach (self::getInstalled() as $installed) {
if (!isset($installed['versions'][$packageName])) {
continue;
}
return isset($installed['versions'][$packageName]['install_path']) ? $installed['versions'][$packageName]['install_path'] : null;
}
throw new \OutOfBoundsException('Package "' . $packageName . '" is not installed');
}
/**
* @return array
* @psalm-return array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}
*/
public static function getRootPackage()
{
$installed = self::getInstalled();
return $installed[0]['root'];
}
/**
* Returns the raw installed.php data for custom implementations
*
* @deprecated Use getAllRawData() instead which returns all datasets for all autoloaders present in the process. getRawData only returns the first dataset loaded, which may not be what you expect.
* @return array[]
* @psalm-return array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}
*/
public static function getRawData()
{
@trigger_error('getRawData only returns the first dataset loaded, which may not be what you expect. Use getAllRawData() instead which returns all datasets for all autoloaders present in the process.', E_USER_DEPRECATED);
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = include __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
return self::$installed;
}
/**
* Returns the raw data of all installed.php which are currently loaded for custom implementations
*
* @return array[]
* @psalm-return list<array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}>
*/
public static function getAllRawData()
{
return self::getInstalled();
}
/**
* Lets you reload the static array from another file
*
* This is only useful for complex integrations in which a project needs to use
* this class but then also needs to execute another project's autoloader in process,
* and wants to ensure both projects have access to their version of installed.php.
*
* A typical case would be PHPUnit, where it would need to make sure it reads all
* the data it needs from this class, then call reload() with
* `require $CWD/vendor/composer/installed.php` (or similar) as input to make sure
* the project in which it runs can then also use this class safely, without
* interference between PHPUnit's dependencies and the project's dependencies.
*
* @param array[] $data A vendor/composer/installed.php data set
* @return void
*
* @psalm-param array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>} $data
*/
public static function reload($data)
{
self::$installed = $data;
self::$installedByVendor = array();
}
/**
* @return array[]
* @psalm-return list<array{root: array{name: string, version: string, reference: string, pretty_version: string, aliases: string[], dev: bool, install_path: string}, versions: array<string, array{dev_requirement: bool, pretty_version?: string, version?: string, aliases?: string[], reference?: string, replaced?: string[], provided?: string[], install_path?: string}>}>
*/
private static function getInstalled()
{
if (null === self::$canGetVendors) {
self::$canGetVendors = method_exists('Composer\Autoload\ClassLoader', 'getRegisteredLoaders');
}
$installed = array();
if (self::$canGetVendors) {
foreach (ClassLoader::getRegisteredLoaders() as $vendorDir => $loader) {
if (isset(self::$installedByVendor[$vendorDir])) {
$installed[] = self::$installedByVendor[$vendorDir];
} elseif (is_file($vendorDir.'/composer/installed.php')) {
$installed[] = self::$installedByVendor[$vendorDir] = require $vendorDir.'/composer/installed.php';
if (null === self::$installed && strtr($vendorDir.'/composer', '\\', '/') === strtr(__DIR__, '\\', '/')) {
self::$installed = $installed[count($installed) - 1];
}
}
}
}
if (null === self::$installed) {
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
self::$installed = require __DIR__ . '/installed.php';
} else {
self::$installed = array();
}
}
$installed[] = self::$installed;
return $installed;
}
}

View File

@@ -0,0 +1,21 @@
Copyright (c) Nils Adermann, Jordi Boggiano
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is furnished
to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -0,0 +1,10 @@
<?php
// autoload_classmap.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'Composer\\InstalledVersions' => $vendorDir . '/composer/InstalledVersions.php',
);

View File

@@ -0,0 +1,9 @@
<?php
// autoload_namespaces.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
);

View File

@@ -0,0 +1,10 @@
<?php
// autoload_psr4.php @generated by Composer
$vendorDir = dirname(dirname(__FILE__));
$baseDir = dirname($vendorDir);
return array(
'Asan\\PHPExcel\\' => array($vendorDir . '/asan/phpexcel/src'),
);

View File

@@ -0,0 +1,57 @@
<?php
// autoload_real.php @generated by Composer
class ComposerAutoloaderInit08d888cc0ebe5ec4e5236ffc732c0960
{
private static $loader;
public static function loadClassLoader($class)
{
if ('Composer\Autoload\ClassLoader' === $class) {
require __DIR__ . '/ClassLoader.php';
}
}
/**
* @return \Composer\Autoload\ClassLoader
*/
public static function getLoader()
{
if (null !== self::$loader) {
return self::$loader;
}
require __DIR__ . '/platform_check.php';
spl_autoload_register(array('ComposerAutoloaderInit08d888cc0ebe5ec4e5236ffc732c0960', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(\dirname(__FILE__)));
spl_autoload_unregister(array('ComposerAutoloaderInit08d888cc0ebe5ec4e5236ffc732c0960', 'loadClassLoader'));
$useStaticLoader = PHP_VERSION_ID >= 50600 && !defined('HHVM_VERSION') && (!function_exists('zend_loader_file_encoded') || !zend_loader_file_encoded());
if ($useStaticLoader) {
require __DIR__ . '/autoload_static.php';
call_user_func(\Composer\Autoload\ComposerStaticInit08d888cc0ebe5ec4e5236ffc732c0960::getInitializer($loader));
} else {
$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
$loader->set($namespace, $path);
}
$map = require __DIR__ . '/autoload_psr4.php';
foreach ($map as $namespace => $path) {
$loader->setPsr4($namespace, $path);
}
$classMap = require __DIR__ . '/autoload_classmap.php';
if ($classMap) {
$loader->addClassMap($classMap);
}
}
$loader->register(true);
return $loader;
}
}

View File

@@ -0,0 +1,36 @@
<?php
// autoload_static.php @generated by Composer
namespace Composer\Autoload;
class ComposerStaticInit08d888cc0ebe5ec4e5236ffc732c0960
{
public static $prefixLengthsPsr4 = array (
'A' =>
array (
'Asan\\PHPExcel\\' => 14,
),
);
public static $prefixDirsPsr4 = array (
'Asan\\PHPExcel\\' =>
array (
0 => __DIR__ . '/..' . '/asan/phpexcel/src',
),
);
public static $classMap = array (
'Composer\\InstalledVersions' => __DIR__ . '/..' . '/composer/InstalledVersions.php',
);
public static function getInitializer(ClassLoader $loader)
{
return \Closure::bind(function () use ($loader) {
$loader->prefixLengthsPsr4 = ComposerStaticInit08d888cc0ebe5ec4e5236ffc732c0960::$prefixLengthsPsr4;
$loader->prefixDirsPsr4 = ComposerStaticInit08d888cc0ebe5ec4e5236ffc732c0960::$prefixDirsPsr4;
$loader->classMap = ComposerStaticInit08d888cc0ebe5ec4e5236ffc732c0960::$classMap;
}, null, ClassLoader::class);
}
}

View File

@@ -0,0 +1,50 @@
{
"packages": [
{
"name": "asan/phpexcel",
"version": "v2.0.1",
"version_normalized": "2.0.1.0",
"source": {
"type": "git",
"url": "https://github.com/Janson-Leung/PHPExcel.git",
"reference": "07ddc15b44c1f3ee967ded35cffeab5fec49a215"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Janson-Leung/PHPExcel/zipball/07ddc15b44c1f3ee967ded35cffeab5fec49a215",
"reference": "07ddc15b44c1f3ee967ded35cffeab5fec49a215",
"shasum": ""
},
"require": {
"php": ">=7.0"
},
"time": "2018-07-23T01:42:26+00:00",
"type": "library",
"installation-source": "dist",
"autoload": {
"psr-4": {
"Asan\\PHPExcel\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Janson Leung",
"homepage": "https://github.com/Janson-Leung"
}
],
"description": "A lightweight PHP library for reading spreadsheet files",
"homepage": "https://github.com/Janson-Leung/PHPExcel",
"support": {
"issues": "https://github.com/Janson-Leung/PHPExcel/issues",
"source": "https://github.com/Janson-Leung/PHPExcel/tree/master"
},
"install-path": "../asan/phpexcel"
}
],
"dev": true,
"dev-package-names": []
}

View File

@@ -0,0 +1,32 @@
<?php return array(
'root' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'reference' => '6582fe4c96d02e645921e4fb4300f84719b6b503',
'name' => '__root__',
'dev' => true,
),
'versions' => array(
'__root__' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
'reference' => '6582fe4c96d02e645921e4fb4300f84719b6b503',
'dev_requirement' => false,
),
'asan/phpexcel' => array(
'pretty_version' => 'v2.0.1',
'version' => '2.0.1.0',
'type' => 'library',
'install_path' => __DIR__ . '/../asan/phpexcel',
'aliases' => array(),
'reference' => '07ddc15b44c1f3ee967ded35cffeab5fec49a215',
'dev_requirement' => false,
),
),
);

View File

@@ -0,0 +1,26 @@
<?php
// platform_check.php @generated by Composer
$issues = array();
if (!(PHP_VERSION_ID >= 70000)) {
$issues[] = 'Your Composer dependencies require a PHP version ">= 7.0.0". You are running ' . PHP_VERSION . '.';
}
if ($issues) {
if (!headers_sent()) {
header('HTTP/1.1 500 Internal Server Error');
}
if (!ini_get('display_errors')) {
if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
fwrite(STDERR, 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . implode(PHP_EOL, $issues) . PHP_EOL.PHP_EOL);
} elseif (!headers_sent()) {
echo 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . str_replace('You are running '.PHP_VERSION.'.', '', implode(PHP_EOL, $issues)) . PHP_EOL.PHP_EOL;
}
}
trigger_error(
'Composer detected issues in your platform: ' . implode(' ', $issues),
E_USER_ERROR
);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
<?php
/* Prevent direct access */
use WPDRMS\ASP\Utils\Plugin;
defined('ABSPATH') or die("You can't access this file directly.");
// $tax_term_filters = asp_parse_tax_term_filters($style);
foreach ( wd_asp()->front_filters->get('position', 'taxonomy') as $k => $filter ) {
// $filter variable is an instance of aspTaxFilter object
// $filter->get() will return the array of filter objects (of stdClass)
// Some local variables for ease of use within the theme
$taxonomy = $filter->data['taxonomy'];
$ch_class = $filter->isMixed() ? 'terms' : preg_replace('/[^a-zA-Z0-9]+/', '', $taxonomy);
include Plugin::templateFilePath('filters/taxonomy/asp-tax-header.php');
switch ( $filter->display_mode ) {
case 'checkbox':
case 'checkboxes':
include Plugin::templateFilePath('filters/taxonomy/asp-tax-checkboxes.php');
break;
case 'dropdown':
include Plugin::templateFilePath('filters/taxonomy/asp-tax-dropdown.php');
break;
case 'dropdownsearch':
include Plugin::templateFilePath('filters/taxonomy/asp-tax-dropdownsearch.php');
break;
case 'multisearch':
include Plugin::templateFilePath('filters/taxonomy/asp-tax-multisearch.php');
break;
case 'radio':
include Plugin::templateFilePath('filters/taxonomy/asp-tax-radio.php');
break;
default:
include Plugin::templateFilePath('filters/taxonomy/asp-tax-checkboxes.php');
break;
}
include Plugin::templateFilePath('filters/taxonomy/asp-tax-footer.php');
}

View File

@@ -0,0 +1,24 @@
<?php
/* Prevent direct access */
use WPDRMS\ASP\Utils\Plugin;
defined('ABSPATH') or die("You can't access this file directly.");
foreach ( wd_asp()->front_filters->get('position', 'content_type') as $filter ) {
include( Plugin::templateFilePath('filters/content_type/asp-content_type-header.php') );
switch ($filter->display_mode) {
case 'checkboxes':
include(Plugin::templateFilePath('filters/content_type/asp-content_type-checkboxes.php'));
break;
case 'radio':
include(Plugin::templateFilePath('filters/content_type/asp-content_type-radio.php'));
break;
default:
include(Plugin::templateFilePath('filters/content_type/asp-content_type-dropdown.php'));
break;
}
include(Plugin::templateFilePath('filters/content_type/asp-content_type-footer.php'));
}

Some files were not shown because too many files have changed in this diff Show More