HTML_ToPDF.php

Go to the documentation of this file.
00001 <?php
00003 // {{{ license
00004 
00005 // +----------------------------------------------------------------------+
00006 // | This source file is subject to version 3.0 of the PHP license,       |
00007 // | that is bundled with this package in the file LICENSE, and is        |
00008 // | available at through the world-wide-web at                           |
00009 // | http://www.php.net/license/3_0.txt                                   |
00010 // | If you did not receive a copy of the PHP license and are unable to   |
00011 // | obtain it through the world-wide-web, please send a note to          |
00012 // | license@php.net so we can mail you a copy immediately.               |
00013 // +----------------------------------------------------------------------+
00014 // | Authors: Jason Rust <jrust@rustyparts.com>                           |
00015 // +----------------------------------------------------------------------+
00016 
00017 // }}}
00018 // {{{ includes
00019 
00020 require_once 'PEAR.php';
00021 
00022 // }}}
00023 // {{{ HTML_ToPDF class
00024 
00039 // }}}
00040 class HTML_ToPDF {
00041     // {{{ properties
00042 
00047     var $htmlFile = '';
00048 
00053     var $pdfFile = '';
00054 
00059     var $tmpDir = '/tmp';
00060 
00065     var $debug = false;
00066 
00071     var $htmlErrors = false;
00072 
00078     var $defaultDomain = '';
00079     
00084     var $defaultPath = '/';
00085 
00090     var $html2psPath = '/usr/bin/html2ps';
00091 
00096     var $ps2pdfPath = '/usr/bin/ps2pdf';
00097 
00102     var $getUrlPath = '/usr/bin/curl -i';
00103 
00109     var $useCSS = true;
00110 
00115     var $additionalCSS = '';
00116 
00121     var $pageInColor = true;
00122 
00127     var $grayScale = false;
00128 
00133     var $scaleFactor = 1;
00134 
00139     var $underlineLinks = null;
00140 
00145     var $headers = array('left' => '$T', 'right' => '$[author]');
00146 
00151     var $footers = array('center' => '- $N -');
00152 
00157     var $html2psrc = '
00158         option {
00159           titlepage: 0;         /* do not generate a title page */
00160           toc: 0;               /* no table of contents */
00161           colour: %pageInColor%; /* create the page in color */
00162           underline: %underlineLinks%;         /* underline links */
00163           grayscale: %grayScale%; /* Make images grayscale? */
00164           scaledoc: %scaleFactor%; /* Scale the document */
00165         }
00166         package {
00167           geturl: %getUrlPath%; /* path to the geturl */
00168         }
00169         showurl: 0;             /* do not show the url next to links */';
00170     
00180     var $makeAbsoluteImageUrls = true;
00181 
00187     var $ps2pdfIncludePath = '';
00188 
00193     var $_htmlString = '';
00194 
00195     // }}}
00196     // {{{ constructor
00197 
00209     function HTML_ToPdf($in_htmlFile, $in_domain, $in_pdfFile = null)
00210     {
00211         $this->htmlFile = $in_htmlFile;
00212         $this->defaultDomain = $in_domain;
00213         // We'll set it to a temporary name later, if needed, so that tmpDir can be set.
00214         $this->pdfFile = $in_pdfFile;
00215         $this->htmlErrors = (php_sapi_name() != 'cli' && !(substr(php_sapi_name(),0,3)=='cgi' && 
00216                     !isset($_SERVER['GATEWAY_INTERFACE'])));
00217     }
00218 
00219     // }}}
00220     // {{{ addHtml2PsSettings()
00221 
00230     function addHtml2PsSettings($in_settings) {
00231         $this->html2psrc .= "\n" . $in_settings;
00232     }
00233 
00234     // }}}
00235     // {{{ setDefaultPath()
00236 
00245     function setDefaultPath($in_path)
00246     {
00247         // Default paths should always have a trailing slash...
00248         if ($in_path{strlen($in_path) - 1} != '/') {
00249             $in_path .= '/';
00250         }
00251 
00252         $this->defaultPath = $in_path;
00253     }
00254 
00255     // }}}
00256     // {{{ setDebug()
00257 
00266     function setDebug($in_debug)
00267     {
00268         $this->debug = $in_debug;
00269     }
00270 
00271     // }}}
00272     // {{{ setHeader()
00273 
00287     function setHeader($in_attribute, $in_value)
00288     {
00289         $this->headers[$in_attribute] = $in_value;
00290     }
00291 
00292     // }}}
00293     // {{{ setFooter()
00294 
00308     function setFooter($in_attribute, $in_value)
00309     {
00310         $this->footers[$in_attribute] = $in_value;
00311     }
00312 
00313     // }}}
00314     // {{{ setTmpDir()
00315 
00324     function setTmpDir($in_path) {
00325         $this->tmpDir = $in_path;
00326     }
00327 
00328     // }}}
00329     // {{{ setUseColor()
00330 
00339     function setUseColor($in_useColor) {
00340         $this->pageInColor = $in_useColor;
00341     }
00342 
00343     // }}}
00344     // {{{ setUseCSS()
00345 
00355     function setUseCSS($in_useCSS) {
00356         $this->useCSS = $in_useCSS;
00357     }
00358 
00359     // }}}
00360     // {{{ setAdditionalCSS()
00361 
00370     function setAdditionalCSS($in_css) {
00371         $this->additionalCSS = $in_css;
00372     }
00373 
00374     // }}}
00375     // {{{ setGetUrlPath()
00376 
00387     function setGetUrl($in_getUrl) {
00388         $this->getUrlPath = $in_getUrl;
00389     }
00390 
00391     // }}}
00392     // {{{ setGrayScale()
00393 
00402     function setGrayScale($in_grayScale) {
00403         $this->grayScale = $in_grayScale;
00404     }
00405 
00406     // }}}
00407     // {{{ setUnderlineLinks()
00408 
00417     function setUnderlineLinks($in_underline) {
00418         $this->underlineLinks = $in_underline;
00419     }
00420 
00421     // }}}
00422     // {{{ setScaleFactor()
00423 
00433     function setScaleFactor($in_scale) {
00434         $this->scaleFactor = $in_scale;
00435     }
00436 
00437     // }}}
00438     // {{{ setHtml2Ps()
00439 
00448     function setHtml2Ps($in_html2ps) {
00449         $this->html2psPath = $in_html2ps;
00450     }
00451 
00452     // }}}
00453     // {{{ setPs2Pdf()
00454 
00463     function setPs2Pdf($in_ps2pdf) {
00464         $this->ps2pdfPath = $in_ps2pdf;
00465     }
00466 
00467     // }}}
00468     // {{{ setMakeAbsoluteImageUrls()
00469 
00480     function setMakeAbsoluteImageUrls($in_makeAbsoluteImageUrls)
00481     {
00482         $this->makeAbsoluteImageUrls = $in_makeAbsoluteImageUrls;
00483     }
00484 
00485     // }}}
00486     // {{{ setPs2pdfIncludePath()
00487 
00496     function setPs2pdfIncludePath($in_ps2pdfIncludePath)
00497     {
00498         $this->ps2pdfIncludePath = $in_ps2pdfIncludePath;
00499     }
00500 
00501     // }}}
00502     // {{{ convert()
00503 
00510     function convert()
00511     {
00512         // make sure html file exists
00513         if (!file_exists($this->htmlFile) && !preg_match(':^(f|ht)tps?\://:i', $this->htmlFile)) {
00514             return new HTML_ToPDFException("Error: The HTML file does not exist: $this->htmlFile");
00515         }
00516 
00517         // first make sure we can execute the programs
00518         // html2ps is just a perl script on windows though
00519         if (!OS_WINDOWS && !@is_executable($this->html2psPath)) {
00520             return new HTML_ToPDFException("Error: html2ps [$this->html2psPath] not executable");
00521         }
00522 
00523         if (!@is_executable($this->ps2pdfPath)) {
00524             return new HTML_ToPDFException("Error: ps2pdf [$this->ps2pdfPath] not executable");
00525         }
00526 
00527         // this can take a while with large files
00528         set_time_limit(160);
00529 
00530         // read the html file in so we can modify it
00531         $this->_htmlString = @implode('', @file($this->htmlFile));
00532         // grab extra CSS
00533         $this->additionalCSS .= $this->_getCSSFromFile();
00534         // modify the conf file
00535         $this->_modifyConfFile();
00536         $paperSize = $this->_getPaperSize();
00537         $orientation = $this->_getOrientation();
00538 
00539         if ($this->makeAbsoluteImageUrls) {
00540             // prepend relative image paths with the default domain and path
00541             $this->_htmlString = preg_replace(':<img (.*?)src=["\']((?!/)(?!http\://).*?)["\']:i', '<img \\1 src="http://'.$this->defaultDomain.$this->defaultPath.'\\2"', $this->_htmlString);
00542             // prepend absolute image paths with the default domain
00543             $this->_htmlString = preg_replace(':<img (.*?)src=["\'](/.*?)["\']:i', '<img \\1 src="http://'.$this->defaultDomain.'\\2"', $this->_htmlString);
00544         }
00545 
00546         // html2ps messes up on several form elements
00547         $this->_htmlString = preg_replace(':<input (.*?)type=["\']?(hidden|submit|button|image|reset|file)["\']?.*?>:i', '<input />', $this->_htmlString);
00548 
00549         $a_tmpFiles = array();
00550         // the conf file has to be an actual file
00551         $a_tmpFiles['config'] = tempnam($this->tmpDir, 'CONF-');
00552 
00553         if (!@is_writable($a_tmpFiles['config'])) {
00554             return new HTML_ToPDFException("Error: the tmp directory is not writable.");
00555         }
00556 
00557         $fp = fopen($a_tmpFiles['config'], 'w');
00558         fwrite($fp, $this->html2psrc);
00559         fclose($fp);
00560         $this->_dumpDebugInfo("html2ps config: $this->html2psrc");
00561 
00562         // make the temporary html file.  We need an html extension for
00563         // at least one version of html2ps
00564         $a_tmpFiles['html'] = tempnam($this->tmpDir, 'HTML-');
00565         while (is_file($a_tmpFiles['html'] . '.html')) {
00566             unlink($a_tmpFiles['html']);
00567             $a_tmpFiles['html'] = tempnam($this->tmpDir, 'HTML-');
00568         }
00569 
00570         @unlink($a_tmpFiles['html']);
00571         $a_tmpFiles['html'] .= '.html';
00572         $fp = fopen($a_tmpFiles['html'], 'w');
00573         fwrite($fp, $this->_htmlString);
00574         fclose($fp);
00575 
00576         // need a temporary postscript file as well
00577         $a_tmpFiles['ps'] = tempnam($this->tmpDir, 'PS-');
00578 
00579         $tmp_result = array();
00580         $cmd = $this->html2psPath . ' ' . $orientation . ' -f ' . $a_tmpFiles['config'] . ' -o ' . 
00581                 $a_tmpFiles['ps'] . ' ' . $a_tmpFiles['html'] .  ' 2>&1'; 
00582         exec($cmd, $tmp_result, $retCode);
00583         $this->_dumpDebugInfo("html2ps command run: $cmd");
00584         $this->_dumpDebugInfo("html2ps output: " . @implode("\n", $tmp_result));
00585 
00586         // Windows exec returns no error codes
00587         if ($retCode != 0 && !OS_WINDOWS) {
00588             $this->_cleanup($a_tmpFiles);
00589             return new HTML_ToPDFException("Error: there was a problem running the html2ps command.  Error code returned: $retCode.  setDebug() for more information.");
00590         }
00591 
00592         $tmp_result = array();
00593         $this->pdfFile = is_null($this->pdfFile) ? tempnam($this->tmpDir, 'PDF-') : $this->pdfFile;
00594         // In case the windows path has spaces in it
00595         $this->ps2pdfPath = OS_WINDOWS ? '"' . $this->ps2pdfPath . '"' : $this->ps2pdfPath;
00596         $cmd = $this->ps2pdfPath . ' -sPAPERSIZE=' . $paperSize . ' -I' . $this->ps2pdfIncludePath . ' ' .
00597             ' -dAutoFilterColorImages=false -dColorImageFilter=/FlateEncode ';
00598         if (OS_WINDOWS) {
00599             // Because \ gets eaten by escapeshellcmd()
00600             $this->pdfFile = str_replace(DIRECTORY_SEPARATOR, '/', $this->pdfFile);
00601             $cmd .= '-dCompatibilityLevel=1.2 -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=' .
00602                     escapeshellcmd($this->pdfFile) . ' -c .setpdfwrite -f ' . $a_tmpFiles['ps'];
00603         }
00604         else {
00605             $cmd .= $a_tmpFiles['ps'] .  ' \'' . escapeshellcmd($this->pdfFile) .  '\' 2>&1';
00606         }
00607 
00608         exec($cmd, $tmp_result, $retCode);
00609 
00610         $this->_dumpDebugInfo("ps2pdf command run: $cmd");
00611         $this->_dumpDebugInfo("ps2pdf output: " . @implode("\n", $tmp_result));
00612         if ($retCode != 0 && !OS_WINDOWS) {
00613             $this->_cleanup($a_tmpFiles);
00614             return new HTML_ToPDFException("Error: there was a problem running the ps2pdf command.  Error code returned: $retCode.  setDebug() for more information.");
00615         }
00616 
00617         $this->_cleanup($a_tmpFiles);
00618         return $this->pdfFile;
00619     }
00620 
00621     // }}}
00622     // {{{ _modifyConfFile()
00623 
00631     function _modifyConfFile()
00632     {
00633         // first determine if we should try and figure out underline link option, based on css
00634         if (is_null($this->underlineLinks)) {
00635             if (preg_match(':a\:link {.*?text-decoration\: (.*?);.*?}:is', $this->additionalCSS, $matches) &&
00636                 is_int(strpos($matches[1], 'none'))) {
00637                 $this->underlineLinks = false;
00638             }
00639             else {
00640                 $this->underlineLinks = true;
00641             }
00642         }
00643 
00644         $this->html2psrc = str_replace('%scaleFactor%', $this->scaleFactor, $this->html2psrc);
00645         $this->html2psrc = str_replace('%getUrlPath%', $this->getUrlPath, $this->html2psrc);
00646         // we convert booleans into numbers
00647         $this->html2psrc = str_replace('%pageInColor%', (int) $this->pageInColor, $this->html2psrc);
00648         $this->html2psrc = str_replace('%grayScale%', (int) $this->grayScale, $this->html2psrc);
00649         $this->html2psrc = str_replace('%underlineLinks%', (int) $this->underlineLinks, $this->html2psrc);
00650 
00651         // Add header and footer information
00652         $this->html2psrc .= "\nheader {\n" . $this->_processHeaderFooter($this->headers);
00653         $this->html2psrc .= "}\nfooter {\n" . $this->_processHeaderFooter($this->footers);
00654         $this->html2psrc .= '}';
00655 
00656         // Add in paper size if not present to ensure that headers/footer will always show
00657         if (!preg_match('/@page.*?{.*?size:\s*(.*?);/is', $this->additionalCSS)) {
00658             $this->additionalCSS .= "\n@page {\n";
00659             $this->additionalCSS .= "  size: 8.5in 11in;\n";
00660             $this->additionalCSS .= "}\n";
00661         }
00662 
00663         // add the global container
00664         $this->html2psrc = '
00665         @html2ps {
00666           ' . $this->html2psrc . '
00667         }
00668         ' . $this->additionalCSS;
00669     }
00670 
00671     // }}}
00672     // {{{ _getCSSFromFile()
00673 
00681     function _getCSSFromFile()
00682     {
00683         if ($this->useCSS) {
00684             $cssFound = '';
00685             // first try to find inline styles
00686             if (preg_match(':<style.*?>(.*?)</style>:is', $this->_htmlString, $matches)) {
00687                 $cssFound = $matches[1];
00688                 // replace it with nothing in the html since it messes up html2ps
00689                 $this->_htmlString = preg_replace(':<style.*?>.*?</style>:is', '', $this->_htmlString);
00690             }
00691             elseif (preg_match(':<link .*? href=["\'](.*?)["\'].*?text/css.*?>:i', $this->_htmlString, $matches)) {
00692                 // prepend defaultDomain - additionaly defaultPath, if relative 
00693                 // path for css is given
00694                 $cssFound = preg_replace(':(^(?!/)(?!http\://).*):i', 'http://'.$this->defaultDomain.$this->defaultPath.'\\1', $matches[1]);
00695                 $cssFound = preg_replace(':(^(/).*):i', 'http://'.$this->defaultDomain.'\\1', $matches[1]);
00696                 $cssFound = implode('', file($cssFound));
00697             }
00698 
00699             // only takes a:link attribute
00700             $cssFound = preg_replace('/a +{/i', 'a:link {', $cssFound);
00701 
00702             // font-size: word causes a crash
00703             $cssFound = preg_replace('/font-size: *([[:alpha:]-]*);/ie', '$this->_convertFontSize("\\1")', $cssFound);
00704 
00705             return $cssFound;
00706         }
00707         else {
00708             return '';
00709         }
00710     }
00711 
00712     // }}}
00713     // {{{ _convertFontSize()
00714 
00723     function _convertFontSize($in_fontString)
00724     {
00725         switch (strtolower($in_fontString)) {
00726             case 'xx-small':
00727                 $size = 6; 
00728                 break;
00729             case 'x-small':
00730                 $size = 8;
00731                 break;
00732             case 'small':
00733                 $size = 10;
00734                 break;
00735             case 'medium':
00736                 $size = 12;
00737                 break;
00738             case 'large':
00739                 $size = 14;
00740                 break;
00741             case 'x-large':
00742                 $size = 16;
00743                 break;
00744             case 'xx-large':
00745                 $size = 18;
00746                 break;
00747             default:
00748                 $size = 12;
00749                 break;
00750         }
00751 
00752         return 'font-size: ' . $size . 'pt;';
00753     }
00754 
00755     // }}}
00756     // {{{ _getPaperSize()
00757 
00765     function _getPaperSize()
00766     {
00767         // :NOTE: We don't support the html2ps paper block since the @page block
00768         // is the new correct way to do it.
00769         preg_match('/@page.*?{.*?size:\s*(.*?);/is', $this->html2psrc, $matches);
00770         if (!isset($matches[1])) {
00771             $matches[1] = '8.5in 11in';
00772         }
00773 
00774         // Take out any extra spaces
00775         $matches[1] = str_replace(' ', '', $matches[1]);
00776         switch ($matches[1]) {
00777             case '8.5in14in':
00778                 $size = 'legal';
00779             break;
00780             case '11in17in':
00781                 $size = '11x17';
00782             break;
00783             case '17in11in':
00784                 $size = 'ledger';
00785             break;
00786             case 'a4':
00787                 $size = 'a4';
00788             break;
00789             case '8.5in11in':
00790             default:
00791                 $size = 'letter';
00792             break;
00793         }
00794 
00795         return $size;
00796     }
00797     
00798     // }}}
00799     // {{{ _getOrientation()
00800 
00809     function _getOrientation() 
00810     {
00811         preg_match('/@page.*?{.*?orientation:\s*(.*?);/is', $this->html2psrc, $matches);
00812         if (!isset($matches[1])) {
00813             $matches[1] = 'portrait';
00814         }
00815         
00816         switch ($matches[1]) {
00817             case 'landscape':
00818                 $orientation = '--landscape';
00819             break;
00820             default:
00821                 $orientation = '';
00822             break;
00823         }
00824 
00825         return $orientation;
00826     }
00827 
00828     // }}}
00829     // {{{ _processHeaderFooter()
00830 
00839     function _processHeaderFooter($in_data)
00840     {
00841         $s_data = '';
00842         // If not using odd/even attributes then override them with the main left/right/center keys
00843         // to ensure that the desired headers/footers get in
00844         foreach (array('left', 'right', 'center') as $s_key) {
00845             if (isset($in_data[$s_key])) {
00846                 if (!isset($in_data["odd-$s_key"])) {
00847                     $in_data["odd-$s_key"] = $in_data[$s_key];
00848                 }
00849                 if (!isset($in_data["even-$s_key"])) {
00850                     $in_data["even-$s_key"] = $in_data[$s_key];
00851                 }
00852             }
00853         }
00854 
00855         foreach ($in_data as $s_key => $s_val) {
00856             $s_data .= "  $s_key: \"$s_val\"\n";
00857         }
00858 
00859         return $s_data;
00860     }
00861 
00862     // }}}
00863     // {{{ _cleanup()
00864 
00873     function _cleanup($in_files)
00874     {
00875         foreach ($in_files as $key => $file) {
00876             if ($this->debug) {
00877                 $this->_dumpDebugInfo("$key file: $file (not removed)");
00878             }
00879             else {
00880                 unlink($file);
00881             }
00882         }
00883     }
00884 
00885     // }}}
00886     // {{{ _dumpDebugInfo()
00887 
00897     function _dumpDebugInfo($in_info)
00898     {
00899         if ($this->debug) {
00900             if ($this->htmlErrors) {
00901                 echo "<pre><span style=\"color: red;\">DEBUG</span>: $in_info</pre>";
00902             }
00903             else {
00904                 echo "DEBUG: $in_info\n";
00905             }
00906         }
00907     }
00908 
00909     // }}}
00910 }
00911 
00912 // {{{ HTML_ToPDFException
00913 
00914 class HTML_ToPDFException extends PEAR_Error {
00915     var $classname             = 'HTML_ToPDF';
00916     var $error_message_prepend = 'Error: ';
00917 
00918     function HTML_ToPDFException($message)
00919     {
00920         $this->PEAR_Error($message);
00921     }
00922 }
00923 
00924 // }}}
00925 // {{{ is_executable()
00926 
00927 if (!function_exists('is_executable')) {
00937     function is_executable($in_filename)
00938     {
00939         return file_exists($in_filename);
00940     }
00941 }
00942 
00943 // }}}

Generated on Sat Feb 11 08:43:10 2006 for HTML_ToPDF by  doxygen 1.4.3-20050530