Author: heiko.braun(a)jboss.com
Date: 2007-07-04 15:14:29 -0400 (Wed, 04 Jul 2007)
New Revision: 3798
Added:
projects/wiki/extensions/SpecialPdfPrint.php
projects/wiki/extensions/wiki2xml/
projects/wiki/extensions/wiki2xml/CREDITS
projects/wiki/extensions/wiki2xml/README
projects/wiki/extensions/wiki2xml/TODO
projects/wiki/extensions/wiki2xml/browse_texts.php
projects/wiki/extensions/wiki2xml/content_provider.php
projects/wiki/extensions/wiki2xml/default.php
projects/wiki/extensions/wiki2xml/extension.php
projects/wiki/extensions/wiki2xml/filter_named_entities.php
projects/wiki/extensions/wiki2xml/gfdl.xml
projects/wiki/extensions/wiki2xml/global_functions.php
projects/wiki/extensions/wiki2xml/mediawiki_converter.php
projects/wiki/extensions/wiki2xml/sample_local.php
projects/wiki/extensions/wiki2xml/template.odt
projects/wiki/extensions/wiki2xml/test.xml
projects/wiki/extensions/wiki2xml/w2x.php
projects/wiki/extensions/wiki2xml/wiki2xml.php
projects/wiki/extensions/wiki2xml/xhtml.xslt
projects/wiki/extensions/wiki2xml/xml2docbook_xml.php
projects/wiki/extensions/wiki2xml/xml2odt.php
projects/wiki/extensions/wiki2xml/xml2tree.php
projects/wiki/extensions/wiki2xml/xml2txt.php
projects/wiki/extensions/wiki2xml/xml2xhtml.php
projects/wiki/extensions/wiki2xml/xmldump2files.php
Modified:
projects/wiki/LocalSettings.php
projects/wiki/skins/Devmo.php
projects/wiki/skins/devmo/css/wikiprint.css
Log:
Add pdf conversion, wiki2xml extensions
Modified: projects/wiki/LocalSettings.php
===================================================================
--- projects/wiki/LocalSettings.php 2007-07-04 11:33:22 UTC (rev 3797)
+++ projects/wiki/LocalSettings.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -147,4 +147,10 @@
$recaptcha_public_key = '6LeaFQAAAAAAANdJOabOzJZc2XIPMX9GaTUeu6zu';
$recaptcha_private_key = '6LeaFQAAAAAAADf35ShIh9BuBytm8o_Vs58VaDQr';
+# PDF export
+#require_once("extensions/SpecialPdfPrint.php");
+
+# WIKI2XML
+require_once ("extensions/wiki2xml/extension.php");
+
?>
Added: projects/wiki/extensions/SpecialPdfPrint.php
===================================================================
--- projects/wiki/extensions/SpecialPdfPrint.php (rev 0)
+++ projects/wiki/extensions/SpecialPdfPrint.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,124 @@
+<?php
+
+//
http://www.mediawiki.org/wiki/Extension:Pdf_Export
+
+if (!defined('MEDIAWIKI')) die();
+require_once ("$IP/includes/SpecialPage.php");
+
+$wgExtensionFunctions[] = 'wfSpecialPdf';
+$wgExtensionCredits['specialpage'][] = array(
+ 'name' => 'Pdf',
+ 'author' =>' Thomas Hempel',
+ 'description' => 'prints a page as pdf',
+ 'url' => 'http://www.mediawiki.org/wiki/Extension:Pdf_Export'
+);
+
+$wgHooks['SkinTemplateBuildNavUrlsNav_urlsAfterPermalink'][] =
'wfSpecialPdfNav';
+
+# The monobook hook doesn't work, i added it straight to the Devmo.php page -> see
'Toolbox' there
+#$wgHooks['MonoBookTemplateToolboxEnd'][] = 'wfSpecialPdfToolbox';
+
+function wfSpecialPdf() {
+ global $IP, $wgMessageCache;
+
+ $wgMessageCache->addMessages(
+ array(
+ 'pdfprint' => 'PdfPrint' ,
+ 'pdf_print_link' => 'Print as PDF'));
+
+ class SpecialPdf extends SpecialPage {
+ var $title;
+ var $article;
+ var $html;
+ var $parserOptions;
+ var $bhtml;
+
+ function SpecialPdf() {
+ SpecialPage::SpecialPage( 'PdfPrint' );
+ }
+
+ function execute( $par ) {
+ global $wgRequest;
+ global $wgOut;
+ global $wgUser;
+ global $wgParser;
+ global $wgScriptPath;
+ global $wgServer;
+
+ $page = isset( $par ) ? $par : $wgRequest->getText(
'page' );
+ $title = Title::newFromText( $page );
+ $article = new Article ($title);
+ $wgOut->setPrintable();
+ $wgOut->disable();
+ $parserOptions = ParserOptions::newFromUser( $wgUser );
+ $parserOptions->setEditSection( false );
+ $parserOptions->setTidy(true);
+ $wgParser->mShowToc = false;
+ $parserOutput = $wgParser->parse(
$article->preSaveTransform( $article->getContent() ) ."\n\n",
+ $title, $parserOptions );
+
+ $bhtml = $parserOutput->getText();
+ $bhtml = utf8_decode($bhtml);
+
+ $bhtml = str_replace ($wgScriptPath, $wgServer . $wgScriptPath,
$bhtml);
+ $bhtml = str_replace ('/w/',$wgServer . '/w/',
$bhtml);
+
+
+ $html = "<html><head><title>" .
utf8_decode($page) . "</title></head>";
+ # TODO: somehow we need to tweak the styles here
+ $html .= "<link rel=\"stylesheet\"
type=\"text/css\" media=\"all\"
href=\"http://localhost/mediawiki/skins/devmo/css/base.css\">";
+ $html .= "<link rel=\"stylesheet\"
type=\"text/css\" media=\"all\"
href=\"http://localhost/mediawiki/skins/devmo/css/wiki.css\">";
+ $html .= "<link rel=\"stylesheet\"
type=\"text/css\" media=\"all\"
href=\"http://localhost/mediawiki/skins/devmo/css/wikiprint.css\">";
+ $html .= "<body>" . $bhtml .
"</body></html>";
+
+ // make a temporary directory with an unique name
+ $mytemp = "/tmp/f" .time(). "-" .rand() .
".html";
+ $article_f = fopen($mytemp,'w');
+ fwrite($article_f, $html);
+ fclose($article_f);
+ putenv("HTMLDOC_NOCGI=1");
+
+ # Write the content type to the client...
+ header("Content-Type: application/pdf");
+ header(sprintf('Content-Disposition: attachment;
filename="%s.pdf"', $page));
+ flush();
+
+ # if the page is on a HTTPS server and contains images that are
on the HTTPS server AND also reachable with HTTP
+ # uncomment the next line
+
+ #system("perl -pi -e 's/img src=\"https:\/\//img
src=\"http:\/\//g' '$mytemp'");
+
+ # Run HTMLDOC to provide the PDF file to the user...
+ passthru("htmldoc -t pdf14 --charset iso-8859-1 --color
--quiet --jpeg --webpage '$mytemp'");
+
+ #unlink ($mytemp);
+
+ }
+ }
+ SpecialPage::addPage (new SpecialPdf());
+}
+
+function wfSpecialPdfNav( &$skintemplate, &$nav_urls, &$oldid, &$revid )
{
+ $nav_urls['pdfprint'] = array(
+ 'text' => wfMsg( 'pdf_print_link' ),
+ 'href' => $skintemplate->makeSpecialUrl(
'PdfPrint', "page=" . wfUrlencode(
"{$skintemplate->thispage}" ) )
+ );
+
+ return true;
+}
+
+function wfSpecialPdfToolbox( &$monobook ) {
+ if ( isset( $monobook->data['nav_urls']['pdfprint'] ) ) {
+ if (
$monobook->data['nav_urls']['pdfprint']['href'] == '' )
{
+ ?><li id="t-ispdf"><?php echo
$monobook->msg( 'pdf_print_link' ); ?></li><?php
+ } else {
+ ?><li id="t-pdf"><?php
+ ?><a href="<?php echo htmlspecialchars(
$monobook->data['nav_urls']['pdfprint']['href'] )
?>"><?php
+ echo $monobook->msg( 'pdf_print_link'
);
+ ?></a><?php
+ ?></li><?php
+ }
+ }
+ return true;
+}
+?>
Added: projects/wiki/extensions/wiki2xml/CREDITS
===================================================================
--- projects/wiki/extensions/wiki2xml/CREDITS (rev 0)
+++ projects/wiki/extensions/wiki2xml/CREDITS 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,8 @@
+wiki2xml is (c) by Magnus Manske 2005-2006 and released under the GPL.
+
+The following people (in alphabetic order) contributed to this project:
+
+Magnus Manske <magnus.manske(a)web.de> Everything Tels didn't do ;-)
+
+Tels <nospam-abuse(a)bloodgate.com> Linux fixes, OpenOffice output
+ REDME and doc
Added: projects/wiki/extensions/wiki2xml/README
===================================================================
--- projects/wiki/extensions/wiki2xml/README (rev 0)
+++ projects/wiki/extensions/wiki2xml/README 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,138 @@
+=pod
+
+=head1 INTRODUCTION
+
+You can read this document better with:
+
+ perldoc README
+
+Otherwise, please just ignore the funny characters.
+
+=head1 NAME
+
+WIKI2XML - Wikitext to XML converter
+
+=head1 INSTALLATION
+
+=head2 Download/Checkout
+
+Check out the current release from SVN:
+
+ svn co
http://svn.wikimedia.org/svnroot/mediawiki/trunk/wiki2xml
+
+=head2 Extension or cgi-bin?
+
+There are two ways to install wiki2xml:
+
+ As extension: Special::Wiki2XML
+ As cgi-bin:
http://example.com/wiki/wiki2xml/w2x
+
+The former should be prefered.
+
+=head2 As Extension
+
+To enable wiki2xml as extension, put all files in the C<< php >> directory
into a
+C<< wiki2xml >> subdirectory of your MediaWiki extensions directory.
+
+ htdocs
+ \- wiki
+ \- extensions
+ \- wiki2xml <-- create this directory
+ \- w2x.php <-- copy files here
+ \- wiki2xml.php etc.
+
+Then add
+
+ require_once ("extensions/wiki2xml/extension.php");
+
+to your C<< LocalSettings.php >>. The extension can then be accessed as
+C<< [[Special:Wiki2XML]] >>.
+
+=head2 cgi-bin - Copy files
+
+The alternative method is to install wiki2xml as cgi-bin script:
+
+Copy the subdirectory C<< ./php/ >> to your server's C<< wiki/
>>
+directory as a subdirectory named C<< w2x >>:
+
+ htdocs
+ \- wiki
+ \- w2x <-- here
+ \- w2x.php
+ \- wiki2xml.php etc
+
+Access it as C<<
http://example.com/wiki/w2x/w2x.php >>.
+
+=head2 Configuration
+
+The configuration is stored in C<< default.php >> and C<< local.php
>>.
+
+There is a C<< sample_local.php >> file, copy it to C<< local.php
>>
+and then edit it to match your configuration.
+
+On a Unix/Linux server the following can be used as a starting point:
+
+ # Directory for temporary files:
+ $xmlg["temp_dir"] = "/tmp";
+
+ # Path to the zip/unzip programs; can be omitted if in default
+ # executable path:
+ #$xmlg["zip_odt_path"] = "";
+
+ # Command to zip directory $1 to file $2:
+ $xmlg["zip_odt"] = 'zip -r9 $1 $2';
+
+ # Command to unzip file $1 to directory $2:
+ $xmlg["unzip_odt"] = 'unzip -x $1 -d $2';
+
+=head1 USAGE
+
+Open the correct URL (depending on install type, see above) in your browser.
+
+Wiki2xml should present you with a form with a textarea and several buttons.
+
+=head2 Using URL parameters
+
+Parameters:
+
+ doit=1
+ text=lines_of_text_or_titles
+ whatsthis=wikitext/articlelist
+
site=en.wikipedia.org/w
+ output_format=xml/text/xhtml/docbook_xml/odt_xml/odt
+
+Optional:
+
+ use_templates=all/none/these/notthese
+ templates=lines_of_templates
+ document_title=
+ add_gfdl=1
+ keep_categories=1
+ keep_interlanguage=1
+
+
+
+=head1 TROUBLESHOOTING
+
+If you get errors like the following:
+
+ Warning: fopen(/tmp/ODD6Rq1qt-DIR/content.xml): failed to open stream:
+ No such file or directory in /.../wiki/wiki2xml/w2x.php on line 112
+
+then make sure that the tmp directory you selected is really writable by
+your webserver.
+
+=head2 OpenOffice Output
+
+For OpenOffice output, the converter will extract a file called C<< template.odt
>>
+into the temp directory. It will then replace/modify the files in it, zip it
+up again and then offer the browser the resulting file as a download.
+
+=head1 AUTHOR
+
+Copyright 2005-2006 by Magnus Manske <magnus.manske(a)web.de>
+
+Released under the GPL.
+
+=cut
+
Added: projects/wiki/extensions/wiki2xml/TODO
===================================================================
--- projects/wiki/extensions/wiki2xml/TODO (rev 0)
+++ projects/wiki/extensions/wiki2xml/TODO 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,7 @@
+* Articles with '_' result in 'Main_Page' instead of 'Main Page'
as headline
+* Sublists, and numbered lists are wrong
+* the ODT output could use a more strict separation between template and
+ generated XML
+* support for <div style="..."> where "..." is text-align,
font-size,
+ border, etc
+
\ No newline at end of file
Added: projects/wiki/extensions/wiki2xml/browse_texts.php
===================================================================
--- projects/wiki/extensions/wiki2xml/browse_texts.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/browse_texts.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,67 @@
+<?php
+
+require_once ( "default.php" ) ;
+require_once ( "global_functions.php" ) ;
+require_once ( "filter_named_entities.php" ) ;
+require_once ( "content_provider.php" ) ;
+require_once ( "wiki2xml.php" ) ;
+require_once ( "xml2xhtml.php" ) ;
+require_once ( "mediawiki_converter.php" ) ;
+
+# FUNCTIONS
+
+function get_param ( $key , $default = "" ) {
+ if ( !isset ( $_REQUEST[$key] ) ) return $default ;
+ return $_REQUEST[$key] ;
+}
+
+# MAIN
+
+@set_time_limit ( 0 ) ; # No time limit
+
+$xmlg = array (
+ 'site_base_url' => "SBU" ,
+ 'resolvetemplates' => true ,
+ 'templates' => array () ,
+ 'namespace_template' => 'Vorlage' ,
+) ;
+
+$content_provider = new ContentProviderTextFile ;
+$converter = new MediaWikiConverter ;
+
+$title = urldecode ( get_param ( 'title' , urlencode ( 'Main Page' ) ) )
;
+$xmlg['page_title'] = $title ;
+
+$format = strtolower ( get_param ( 'format' , 'xhtml' ) ) ;
+$content_provider->basedir = $base_text_dir ;
+
+$text = $content_provider->get_wiki_text ( $title ) ;
+$xml = $converter->article2xml ( $title , $text , $xmlg ) ;
+
+if ( $format =="xml" ) {
+ # XML
+ header('Content-type: text/xml; charset=utf-8');
+ print "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
+ print $xml ;
+} else if ( $format == "text" ) {
+ # Plain text
+ $xmlg['plaintext_markup'] = true ;
+ $xmlg['plaintext_prelink'] = true ;
+ $out = $converter->articles2text ( $xml , $xmlg ) ;
+ $out = str_replace ( "\n" , "<br/>" , $out ) ;
+ header('Content-type: text/html; charset=utf-8');
+ print $out ;
+} else {
+ # XHTML
+ if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) {
+ # Skipping the "strict" part ;-)
+ header("Content-type: text/html; charset=utf-8");
+# header("Content-type: application/xhtml+xml");
+ } else {
+ # Header hack for IE
+ header("Content-type: text/html; charset=utf-8");
+ }
+ print $converter->articles2xhtml ( $xml , $xmlg ) ;
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/content_provider.php
===================================================================
--- projects/wiki/extensions/wiki2xml/content_provider.php (rev
0)
+++ projects/wiki/extensions/wiki2xml/content_provider.php 2007-07-04 19:14:29 UTC (rev
3798)
@@ -0,0 +1,353 @@
+<?php
+
+# Abstract base class
+class ContentProvider {
+ var $load_time = 0 ; # Time to load text and templates, to judge actual parsing speed
+ var $article_list = array () ;
+ var $authors = array () ;
+ var $block_file_download = false ;
+
+ function get_wiki_text ( $title , $do_cache = false ) { return "" ; } # dummy
+ function get_template_text ( $title ) { return "" ; } # dummy
+
+ function add_article ( $title ) {
+ $this->article_list[] = urlencode ( trim ( $title ) ) ;
+ }
+
+ function is_an_article ( $title ) {
+ $title = urlencode ( trim ( $title ) ) ;
+ return in_array ( $title , $this->article_list ) ;
+ }
+
+ /**
+ * XXX TODO: why are some negative?
+ * Gets the numeric namespace
+ * "6" = images
+ * "-8" = category link
+ * "-9" = interlanguage link
+ * "11" = templates
+ */ function get_namespace_id ( $text ) {
+ $text = strtoupper ( $text ) ;
+ $text = explode ( ":" , $text , 2 ) ;
+ if ( count ( $text ) != 2 ) return 0 ;
+ $text = trim ( array_shift ( $text ) ) ;
+ if ( $text == "" ) return 0 ;
+ $ns = 0 ;
+
+ if ( $text == "CATEGORY" || $text == "KATEGORIE" ) return -8 ; #
Hackish, for category link
+ if ( strlen ( $text ) < 4 ) return -9 ; # Hackish, for interlanguage link
+ if ( $text == "SIMPLE" ) return -9 ;
+
+ # Horrible manual hack, for now
+ if ( $text == "IMAGE" || $text == "BILD" ) $ns = 6 ;
+ if ( $text == "TEMPLATE" || $text == "VORLAGE" ) $ns = 11 ;
+
+ return $ns ;
+ }
+
+ function copyimagefromwiki ( $name , $url = "" ) {
+ global $xmlg ;
+ $dir = $xmlg['image_destination'] ;
+ if ( $url == "" )
+ $url = $this->get_image_url ( name ) ;
+ $fname = urlencode ( $name ) ;
+ $target = $dir . "/" . $fname ;
+ if ( !file_exists ( $target ) && !$this->block_file_download ) {
+ @mkdir ( $dir ) ;
+ @copy ( $url , $target ) ;
+ }
+ return $fname ;
+ }
+
+ function myurlencode ( $t ) {
+ $t = str_replace ( " " , "_" , $t ) ;
+ $t = urlencode ( $t ) ;
+ return $t ;
+ }
+
+
+ function get_image_url ( $name ) {
+ global $xmlg ;
+ $site = $xmlg['site_base_url'] ;
+ $parts = explode ( ".wikipedia.org/" , $site ) ;
+ $parts2 = explode ( ".wikibooks.org/" , $site ) ;
+
+ $image = utf8_encode ( $name ) ;
+ $image2 = ucfirst ( str_replace ( " " , "_" , $name ) ) ;
+ $m = md5( $image2 ) ;
+ $m1 = substr ( $m , 0 , 1 ) ;
+ $m2 = substr ( $m , 0 , 2 ) ;
+ $i = "{$m1}/{$m2}/" . $this->myurlencode ( ucfirst ( $name ) ) ;
+
+
+ if ( count ($parts ) > 1 ) {
+ $lang = array_shift ( $parts ) ;
+ $url = "http://upload.wikimedia.org/wikipedia/{$lang}/{$i}" ;
+ $url2 = "http://upload.wikimedia.org/wikipedia/commons/{$i}" ;
+ $h = @fopen ( $url , "r" ) ;
+ if ( $h === false ) $url = $url2 ;
+ else fclose ( $h ) ;
+ } else if ( count ($parts2 ) > 1 ) {
+ $lang = array_shift ( $parts2 ) ;
+ $url = "http://upload.wikimedia.org/wikibooks/{$lang}/{$i}" ;
+ $url2 = "http://upload.wikimedia.org/wikipedia/commons/{$i}" ;
+ $h = @fopen ( $url , "r" ) ;
+ if ( $h === false ) $url = $url2 ;
+ else fclose ( $h ) ;
+ } else {
+ $url = "http://{$site}/images/{$i}" ;
+ }
+# print "<a href='{$url}'>{$url}</a><br/>" ;
+ return $url ;
+ }
+
+ function do_show_images () {
+ return true ;
+ }
+
+}
+
+
+# Access through HTTP protocol
+class ContentProviderHTTP extends ContentProvider {
+ var $article_cache = array () ;
+ var $first_title = "" ;
+ var $load_error ;
+
+ function between_tag ( $tag , &$text ) {
+ $a = explode ( "<{$tag}" , $text , 2 ) ;
+ if ( count ( $a ) == 1 ) return "" ;
+ $a = explode ( ">" , " " . array_pop ( $a ) , 2 ) ;
+ if ( count ( $a ) == 1 ) return "" ;
+ $a = explode ( "</{$tag}>" , array_pop ( $a ) , 2 ) ;
+ if ( count ( $a ) == 1 ) return "" ;
+ return array_shift ( $a ) ;
+ }
+
+ function do_get_contents ( $title ) {
+ global $xmlg ;
+ $use_se = false ;
+ if ( isset ( $xmlg["use_special_export"] ) &&
$xmlg["use_special_export"] == 1 ) $use_se = true ;
+
+ if ( $use_se ) {
+ $url = "http://" . $xmlg["site_base_url"] .
"/index.php?listauthors=1&title=Special:Export/" . urlencode ( $title ) ;
+ } else {
+ if ( $xmlg["use_toolserver_url"] ) {
+# $url = "http://" . $xmlg["site_base_url"] .
"/index.php?action=raw&title=" . urlencode ( $title ) ;
+ $u = urlencode ( $title ) ;
+ $site = array_shift ( explode ( "/" , $xmlg["site_base_url"] ) )
;
+ $url =
"http://tools.wikimedia.de/~daniel/WikiSense/WikiProxy.php?wiki={$site}&title={$u}&rev=0&go=Fetch"
;
+ } else {
+ $url = "http://" . $xmlg["site_base_url"] .
"/index.php?action=raw&title=" . urlencode ( $title ) ;
+ }
+ }
+ $s = @file_get_contents ( $url ) ;
+
+ if ( $use_se ) {
+ $text = html_entity_decode ( $this->between_tag ( "text" , $s ) ) ;
+ $this->authors = array () ;
+ $authors = $this->between_tag ( "contributors" , $s ) ;
+ $authors = explode ( "</contributor><contributor>" , $authors )
;
+ foreach ( $authors AS $author ) {
+ $id = $this->between_tag ( "id" , $author ) ;
+ if ( $id == '0' || $id == '' ) continue ; # Skipping IPs and
(possibly) broken entries
+ $name = $this->between_tag ( "username" , $author ) ;
+ $this->authors[] = $name ;
+ }
+ $s = $text ;
+ }
+ return $s ;
+ }
+
+ function get_wiki_text ( $title , $do_cache = false ) {
+ global $xmlg ;
+ $load_error = false ;
+ $title = trim ( $title ) ;
+ if ( $title == "" ) return "" ; # Just in case...
+ if ( isset ( $this->article_cache[$title] ) ) # Already in the cache
+ return $this->article_cache[$title] ;
+
+ if ( $this->first_title == "" ) $this->first_title = $title ;
+
+ # Retrieve it
+ $t1 = microtime_float() ;
+ $s = $this->do_get_contents ( $title ) ;
+ if ( strtoupper ( substr ( $s , 0 , 9 ) ) == "#REDIRECT" ) {
+ $t2 = explode ( "[[" , $s , 2 ) ;
+ $t2 = array_pop ( $t2 ) ;
+ $t2 = explode ( "]]" , $t2 , 2 ) ;
+ $t2 = array_shift ( $t2 ) ;
+ $s = $this->do_get_contents ( $t2 ) ;
+ }
+ $this->load_time += microtime_float() - $t1 ;
+
+ $comp = '<!DOCTYPE html PUBLIC "-//W3C//DTD' ;
+ if ( substr ( $s , 0 , strlen ( $comp ) ) == $comp ) $s = "" ; # Catching
wrong title error
+
+ if ( $do_cache ) $this->article_cache[$title] = $s ;
+ return $s ;
+ }
+
+ function get_local_url ( $title ) {
+ return "/" . array_pop ( explode ( "/" , $this->get_var (
'site_base_url' ) , 2 ) ) . "/index.php?title=" . urlencode ( $title )
;
+ }
+
+ function get_server_url () {
+ return "http://" . array_shift ( explode ( "/" , $this->get_var
( 'site_base_url' ) , 2 ) ) ;
+ }
+
+ function get_full_url ( $title ) {
+ return $this->get_server_url () . $this->get_local_url ( $title ) ;
+ }
+
+ function get_namespace_template () {
+ return $this->get_var ( 'namespace_template' ) ;
+ }
+
+ function get_var ( $var ) {
+ global $xmlg ;
+ if ( !isset ( $xmlg[$var] ) ) return false ;
+ return $xmlg[$var] ;
+ }
+
+ function get_template_text ( $title ) {
+ # Check for fix variables
+ if ( $title == "PAGENAME" ) return $this->first_title ;
+ if ( $title == "PAGENAMEE" ) return urlencode ( $this->first_title ) ;
+ if ( $title == "SERVER" ) return $this->get_server_url () ;
+ if ( $title == "CURRENTDAYNAME" ) return date ( "l" ) ;
+ if ( strtolower ( substr ( $title , 0 , 9 ) ) == "localurl:" )
+ return $this->get_local_url ( substr ( $title , 9 ) ) ;
+
+ $title = trim ( $title ) ;
+ if ( count ( explode ( ":" , $title , 2 ) ) == 1 ) # Does the template title
contain a ":"?
+ $title = $this->get_namespace_template() . ":" . $title ;
+ else if ( substr ( $title , 0 , 1 ) == ":" ) # Main namespace
+ $title = substr ( $title , 1 ) ;
+ return $this->get_wiki_text ( $title , true ) ; # Cache template texts
+ }
+
+ function get_internal_link ( $target , $text ) {
+ return $text ; # Dummy
+ }
+}
+
+
+
+
+# Access through text file structure
+class ContentProviderTextFile extends ContentProviderHTTP {
+ var $file_ending = ".txt" ;
+
+ function do_get_contents ( $title ) {
+ return $this->get_page_text ( $title ) ;
+ }
+
+ /**
+ Called from outside
+ Could probably remained unchanged from HTTP class, but this is shorter, and caching is
irrelevant for text files (disk cache)
+ */
+ function get_wiki_text ( $title , $do_cache = false ) {
+ $title = trim ( $title ) ;
+ if ( $title == "" ) return "" ; # Just in case...
+ if ( $this->first_title == "" ) {
+ $this->first_title = $title ;
+ }
+ $text = $this->get_page_text ( $title ) ;
+ return $text ;
+ }
+
+ function get_file_location ( $ns , $title ) {
+ return get_file_location_global ( $this->basedir , $ns , $title , false ) ;
+ }
+
+ function get_page_text ( $page , $allow_redirect = true ) {
+ $filename = $this->get_file_location ( 0 , $page ) ;
+ $filename = $filename->fullname . $this->file_ending ;
+ if ( !file_exists ( $filename ) ) return "" ;
+ $text = trim ( file_get_contents ( $filename ) ) ;
+
+ # REDIRECT?
+ if ( $allow_redirect && strtoupper ( substr ( $text , 0 , 9 ) ) ==
"#REDIRECT" ) {
+ $text = substr ( $text , 9 ) ;
+ $text = array_shift ( explode ( "\n" , $text , 2 ) ) ;
+ $text = str_replace ( "[[" , "" , $text ) ;
+ $text = str_replace ( "]]" , "" , $text ) ;
+ $text = ucfirst ( trim ( $text ) ) ;
+ $text = $this->get_page_text ( $text , false ) ;
+ }
+ return $text ;
+ }
+
+ function get_internal_link ( $target , $text ) {
+ $file = $this->get_file_location ( 0 , $target ) ;
+ if ( !file_exists ( $file->fullname.$this->file_ending ) ) return $text ;
+ else return "<a href='browse_texts.php?title=" . urlencode ( $target )
. "'>{$text}</a>" ;
+ }
+
+ function do_show_images () {
+ return false ;
+ }
+
+}
+
+# Access through MySQL interface
+# (Used via the extension via Special::wiki2XML)
+class ContentProviderMySQL extends ContentProviderHTTP {
+
+ function do_get_contents ( $title ) {
+ return $this->get_page_text ( $title ) ;
+ }
+
+ /**
+ Called from outside
+ */
+ function get_wiki_text ( $title , $do_cache = false ) {
+ $title = trim ( $title ) ;
+ if ( $title == "" ) return "" ; # Just in case...
+ if ( $this->first_title == "" ) {
+ $this->first_title = $title ;
+ }
+ $text = $this->get_page_text ( $title ) ;
+ return $text ;
+ }
+
+ function get_file_location ( $ns , $title ) {
+ return get_file_location_global ( $this->basedir , $ns , $title , false ) ;
+ }
+
+ function get_page_text ( $page , $allow_redirect = true ) {
+ $title = Title::newFromText ( $page ) ;
+ $article = new Article ( $title ) ;
+
+ # article does not exist?
+ if (!$article->exists()) {
+ return "";
+ }
+ $text = $article->getContent () ;
+
+ # REDIRECT?
+ if ( $allow_redirect && strtoupper ( substr ( $text , 0 , 9 ) ) ==
"#REDIRECT" ) {
+ $text = substr ( $text , 9 ) ;
+ $text = array_shift ( explode ( "\n" , $text , 2 ) ) ;
+ $text = str_replace ( "[[" , "" , $text ) ;
+ $text = str_replace ( "]]" , "" , $text ) ;
+ $text = ucfirst ( trim ( $text ) ) ;
+ $text = $this->get_page_text ( $text , false ) ;
+ }
+ return $text ;
+ }
+
+ function get_internal_link ( $target , $text ) {
+ $file = $this->get_file_location ( 0 , $target ) ;
+ if ( !file_exists ( $file->fullname.$this->file_ending ) ) return $text ;
+ else return "<a href='browse_texts.php?title=" . urlencode ( $target )
. "'>{$text}</a>" ;
+ }
+
+ function do_show_images () {
+ return false ;
+ }
+
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/default.php
===================================================================
--- projects/wiki/extensions/wiki2xml/default.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/default.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,22 @@
+<?php
+
+$xmlg["namespace_template"] = "Template" ;
+$xmlg["site_base_url"] = "en.wikipedia.org/w" ;
+$xmlg["book_title"] = "No title" ;
+$xmlg['sourcedir'] = "." ;
+$xmlg["temp_dir"] = "/tmp" ;
+$xmlg['is_windows'] = false ;
+$xmlg['allow_get'] = false ;
+$xmlg["use_toolserver_url"] = false ;
+$xmlg["odt_footnote"] = 'footnote' ;
+$xmlg["allow_xml_temp_files"] = true ;
+$xmlg["use_xml_temp_files"] = false ;
+$xmlg["xhtml_source"] = false ;
+$xmlg['xhtml_justify'] = false ;
+$xmlg['xhtml_logical_markup'] = false ;
+$xmlg['text_hide_images'] = false ;
+$xmlg['text_hide_tables'] = false ;
+
+@include ( "local.php" ) ;
+
+?>
Added: projects/wiki/extensions/wiki2xml/extension.php
===================================================================
--- projects/wiki/extensions/wiki2xml/extension.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/extension.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,77 @@
+<?php
+/*
+To enable this extension, put all files in this directory into a "wiki2xml"
+subdirectory of your MediaWiki extensions directory.
+Also, add
+ require_once ( "extensions/wiki2xml/extension.php" ) ;
+to your LocalSettings.php
+The extension will then be accessed as [[Special:Wiki2XML]].
+*/
+
+if( !defined( 'MEDIAWIKI' ) ) die();
+
+# Integrating into the MediaWiki environment
+
+$wgExtensionCredits['Wiki2XML'][] = array(
+ 'name' => 'Wiki2XML',
+ 'description' => 'An extension to convert wiki markup into
XML.',
+ 'author' => 'Magnus Manske'
+);
+
+$wgExtensionFunctions[] = 'wfWiki2XMLExtension';
+
+# for Special::Version:
+$wgExtensionCredits['parserhook'][] = array(
+ 'name' => 'wiki2xml extension',
+ 'author' => 'Magnus Manske et al.',
+ 'url' => 'http://en.wikipedia.org/wiki/User:Magnus_Manske',
+ 'version' => 'v0.02',
+);
+
+
+#_____________________________________________________________________________
+
+/**
+ * The special page
+ */
+function wfWiki2XMLExtension() { # Checked for HTML and MySQL insertion attacks
+ global $IP, $wgMessageCache;
+# wfTasksAddCache();
+
+ // FIXME : i18n
+ $wgMessageCache->addMessage( 'wiki2xml', 'Wiki2XML' );
+
+ require_once $IP.'/includes/SpecialPage.php';
+
+ class SpecialWiki2XML extends SpecialPage {
+
+ /**
+ * Constructor
+ */
+ function SpecialWiki2XML() { # Checked for HTML and MySQL insertion attacks
+ SpecialPage::SpecialPage( 'Wiki2XML' );
+ $this->includable( true );
+ }
+
+ /**
+ * Special page main function
+ */
+ function execute( $par = null ) { # Checked for HTML and MySQL insertion attacks
+ global $wgOut, $wgRequest, $wgUser, $wgTitle, $IP;
+ $fname = 'Special::Tasks:execute';
+ global $xmlg , $html_named_entities_mapping_mine, $content_provider;
+ include_once ( "default.php" ) ;
+ $xmlg['sourcedir'] = $IP.'/extensions/wiki2xml' ;
+ include_once ( "w2x.php" ) ;
+
+ $this->setHeaders();
+ $wgOut->addHtml( $out );
+ }
+
+ } # end of class
+
+ SpecialPage::addPage( new SpecialWiki2XML );
+}
+
+
+?>
Added: projects/wiki/extensions/wiki2xml/filter_named_entities.php
===================================================================
--- projects/wiki/extensions/wiki2xml/filter_named_entities.php
(rev 0)
+++ projects/wiki/extensions/wiki2xml/filter_named_entities.php 2007-07-04 19:14:29 UTC
(rev 3798)
@@ -0,0 +1,281 @@
+<?php
+/**
+ * This file is to compensate for a bug in PHP4 and early PHP5 versions
+ * which do not replace some entities correctly
+ */
+
+$html_named_entities_mapping_mine = array (
+ // Obtained with:
+ // less /usr/share/xml/entities/xhtml/*.ent|grep '^<!ENTITY'|sed -e
's/^<\!ENTITY[ \t]*\([A-Za-z0-9]*\)[
\t]*"&#\([0-9]*\);".*$/"\1"=>\2,/' >
/home/dom/data/2005/04/entities-table
+'nbsp'=>160,
+'iexcl'=>161,
+'cent'=>162,
+'pound'=>163,
+'curren'=>164,
+'yen'=>165,
+'brvbar'=>166,
+'sect'=>167,
+'uml'=>168,
+'copy'=>169,
+'ordf'=>170,
+'laquo'=>171,
+'not'=>172,
+'shy'=>173,
+'reg'=>174,
+'macr'=>175,
+'deg'=>176,
+'plusmn'=>177,
+'sup2'=>178,
+'sup3'=>179,
+'acute'=>180,
+'micro'=>181,
+'para'=>182,
+'middot'=>183,
+'cedil'=>184,
+'sup1'=>185,
+'ordm'=>186,
+'raquo'=>187,
+'frac14'=>188,
+'frac12'=>189,
+'frac34'=>190,
+'iquest'=>191,
+'Agrave'=>192,
+'Aacute'=>193,
+'Acirc'=>194,
+'Atilde'=>195,
+'Auml'=>196,
+'Aring'=>197,
+'AElig'=>198,
+'Ccedil'=>199,
+'Egrave'=>200,
+'Eacute'=>201,
+'Ecirc'=>202,
+'Euml'=>203,
+'Igrave'=>204,
+'Iacute'=>205,
+'Icirc'=>206,
+'Iuml'=>207,
+'ETH'=>208,
+'Ntilde'=>209,
+'Ograve'=>210,
+'Oacute'=>211,
+'Ocirc'=>212,
+'Otilde'=>213,
+'Ouml'=>214,
+'times'=>215,
+'Oslash'=>216,
+'Ugrave'=>217,
+'Uacute'=>218,
+'Ucirc'=>219,
+'Uuml'=>220,
+'Yacute'=>221,
+'THORN'=>222,
+'szlig'=>223,
+'agrave'=>224,
+'aacute'=>225,
+'acirc'=>226,
+'atilde'=>227,
+'auml'=>228,
+'aring'=>229,
+'aelig'=>230,
+'ccedil'=>231,
+'egrave'=>232,
+'eacute'=>233,
+'ecirc'=>234,
+'euml'=>235,
+'igrave'=>236,
+'iacute'=>237,
+'icirc'=>238,
+'iuml'=>239,
+'eth'=>240,
+'ntilde'=>241,
+'ograve'=>242,
+'oacute'=>243,
+'ocirc'=>244,
+'otilde'=>245,
+'ouml'=>246,
+'divide'=>247,
+'oslash'=>248,
+'ugrave'=>249,
+'uacute'=>250,
+'ucirc'=>251,
+'uuml'=>252,
+'yacute'=>253,
+'thorn'=>254,
+'yuml'=>255,
+'quot'=>34,
+'amp'=>38,
+'lt'=>60,
+'gt'=>62,
+'OElig'=>338,
+'oelig'=>339,
+'Scaron'=>352,
+'scaron'=>353,
+'Yuml'=>376,
+'circ'=>710,
+'tilde'=>732,
+'ensp'=>8194,
+'emsp'=>8195,
+'thinsp'=>8201,
+'zwnj'=>8204,
+'zwj'=>8205,
+'lrm'=>8206,
+'rlm'=>8207,
+'ndash'=>8211,
+'mdash'=>8212,
+'lsquo'=>8216,
+'rsquo'=>8217,
+'sbquo'=>8218,
+'ldquo'=>8220,
+'rdquo'=>8221,
+'bdquo'=>8222,
+'dagger'=>8224,
+'Dagger'=>8225,
+'permil'=>8240,
+'lsaquo'=>8249,
+'rsaquo'=>8250,
+'euro'=>8364,
+'fnof'=>402,
+'Alpha'=>913,
+'Beta'=>914,
+'Gamma'=>915,
+'Delta'=>916,
+'Epsilon'=>917,
+'Zeta'=>918,
+'Eta'=>919,
+'Theta'=>920,
+'Iota'=>921,
+'Kappa'=>922,
+'Lambda'=>923,
+'Mu'=>924,
+'Nu'=>925,
+'Xi'=>926,
+'Omicron'=>927,
+'Pi'=>928,
+'Rho'=>929,
+'Sigma'=>931,
+'Tau'=>932,
+'Upsilon'=>933,
+'Phi'=>934,
+'Chi'=>935,
+'Psi'=>936,
+'Omega'=>937,
+'alpha'=>945,
+'beta'=>946,
+'gamma'=>947,
+'delta'=>948,
+'epsilon'=>949,
+'zeta'=>950,
+'eta'=>951,
+'theta'=>952,
+'iota'=>953,
+'kappa'=>954,
+'lambda'=>955,
+'mu'=>956,
+'nu'=>957,
+'xi'=>958,
+'omicron'=>959,
+'pi'=>960,
+'rho'=>961,
+'sigmaf'=>962,
+'sigma'=>963,
+'tau'=>964,
+'upsilon'=>965,
+'phi'=>966,
+'chi'=>967,
+'psi'=>968,
+'omega'=>969,
+'thetasym'=>977,
+'upsih'=>978,
+'piv'=>982,
+'bull'=>8226,
+'hellip'=>8230,
+'prime'=>8242,
+'Prime'=>8243,
+'oline'=>8254,
+'frasl'=>8260,
+'weierp'=>8472,
+'image'=>8465,
+'real'=>8476,
+'trade'=>8482,
+'alefsym'=>8501,
+'larr'=>8592,
+'uarr'=>8593,
+'rarr'=>8594,
+'darr'=>8595,
+'harr'=>8596,
+'crarr'=>8629,
+'lArr'=>8656,
+'uArr'=>8657,
+'rArr'=>8658,
+'dArr'=>8659,
+'hArr'=>8660,
+'forall'=>8704,
+'part'=>8706,
+'exist'=>8707,
+'empty'=>8709,
+'nabla'=>8711,
+'isin'=>8712,
+'notin'=>8713,
+'ni'=>8715,
+'prod'=>8719,
+'sum'=>8721,
+'minus'=>8722,
+'lowast'=>8727,
+'radic'=>8730,
+'prop'=>8733,
+'infin'=>8734,
+'ang'=>8736,
+'and'=>8743,
+'or'=>8744,
+'cap'=>8745,
+'cup'=>8746,
+'int'=>8747,
+'there4'=>8756,
+'sim'=>8764,
+'cong'=>8773,
+'asymp'=>8776,
+'ne'=>8800,
+'equiv'=>8801,
+'le'=>8804,
+'ge'=>8805,
+'sub'=>8834,
+'sup'=>8835,
+'nsub'=>8836,
+'sube'=>8838,
+'supe'=>8839,
+'oplus'=>8853,
+'otimes'=>8855,
+'perp'=>8869,
+'sdot'=>8901,
+'lceil'=>8968,
+'rceil'=>8969,
+'lfloor'=>8970,
+'rfloor'=>8971,
+'lang'=>9001,
+'rang'=>9002,
+'loz'=>9674,
+'spades'=>9824,
+'clubs'=>9827,
+'hearts'=>9829,
+'diams'=>9830,
+'32'=>32,
+);
+
+function utf8_chr($code)
+{
+ if($code<128) return chr($code);
+ else if($code<2048) return chr(($code>>6)+192).chr(($code&63)+128);
+ else if($code<65536) return
chr(($code>>12)+224).chr((($code>>6)&63)+128).chr(($code&63)+128);
+ else if($code<2097152) return
chr($code>>18+240).chr((($code>>12)&63)+128).chr(($code>>6)&63+128).chr($code&63+128);
+}
+
+function filter_named_entities(&$content) {
+ global $html_named_entities_mapping_mine;
+ foreach($html_named_entities_mapping_mine as $name => $value) {
+ $content=str_replace('&'.$name.';',utf8_chr ( $value
),$content);
+ }
+ $content=str_replace('�','i',$content); # Ugly hack
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/gfdl.xml
===================================================================
--- projects/wiki/extensions/wiki2xml/gfdl.xml (rev 0)
+++ projects/wiki/extensions/wiki2xml/gfdl.xml 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,446 @@
+<appendix id="gfdl">
+<appendixinfo>
+<title>GNU Free Documentation License</title>
+
+ <pubdate>Version 1.2, November 2002</pubdate>
+ <copyright><year>2000,2001,2002</year>
+ <holder>Free Software Foundation, Inc.</holder></copyright>
+ <legalnotice id="gfdl-legalnotice">
+ <para><address>Free Software Foundation, Inc.
+ <street>51 Franklin St, Fifth Floor</street>,
+ <city>Boston</city>,
+ <state>MA</state>
+ <postcode>02110-1301</postcode>
+ <country>USA</country>
+ </address></para>
+ <para>Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.</para>
+ </legalnotice>
+ <releaseinfo>Version 1.2, November 2002</releaseinfo>
+</appendixinfo>
+
+<title>GNU Free Documentation License</title>
+<section id="gfdl-0"><title>PREAMBLE</title>
+
+<para>The purpose of this License is to make a manual, textbook, or
+other functional and useful document "free" in the sense of freedom: to
+assure everyone the effective freedom to copy and redistribute it, with
+or without modifying it, either commercially or noncommercially.
+Secondarily, this License preserves for the author and publisher a way
+to get credit for their work, while not being considered responsible for
+modifications made by others.</para>
+
+<para>This License is a kind of "copyleft", which means that derivative
+works of the document must themselves be free in the same sense. It
+complements the GNU General Public License, which is a copyleft license
+designed for free software.</para>
+
+<para>We have designed this License in order to use it for manuals for
+free software, because free software needs free documentation: a free
+program should come with manuals providing the same freedoms that the
+software does. But this License is not limited to software manuals; it
+can be used for any textual work, regardless of subject matter or
+whether it is published as a printed book. We recommend this License
+principally for works whose purpose is instruction or reference.</para>
+</section>
+
+<section id="gfdl-1"><title>APPLICABILITY AND
DEFINITIONS</title>
+
+<para id="gfdl-doc">This License applies to any manual or other work, in
+any medium, that contains a notice placed by the copyright holder saying
+it can be distributed under the terms of this License. Such a notice
+grants a world-wide, royalty-free license, unlimited in duration, to use
+that work under the conditions stated herein. The "Document", below,
+refers to any such manual or work. Any member of the public is a
+licensee, and is addressed as "you". You accept the license if you
+copy, modify or distribute the work in a way requiring permission under
+copyright law.</para>
+
+<para id="gfdl-mod-ver">A "Modified Version" of the Document
means any
+work containing the Document or a portion of it, either copied verbatim,
+or with modifications and/or translated into another language.</para>
+
+<para id="gfdl-secnd-sect">A "Secondary Section" is a named
appendix or
+a front-matter section of the Document that deals exclusively with the
+relationship of the publishers or authors of the Document to the
+Document's overall subject (or to related matters) and contains nothing
+that could fall directly within that overall subject. (Thus, if the
+Document is in part a textbook of mathematics, a Secondary Section may
+not explain any mathematics.) The relationship could be a matter of
+historical connection with the subject or with related matters, or of
+legal, commercial, philosophical, ethical or political position
+regarding them.</para>
+
+<para id="gfdl-inv-sect">The "Invariant Sections" are certain
Secondary
+Sections whose titles are designated, as being those of Invariant
+Sections, in the notice that says that the Document is released under
+this License. If a section does not fit the above definition of
+Secondary then it is not allowed to be designated as Invariant. The
+Document may contain zero Invariant Sections. If the Document does not
+identify any Invariant Sections then there are none.</para>
+
+<para id="gfdl-cov-text">The "Cover Texts" are certain short
passages of
+text that are listed, as Front-Cover Texts or Back-Cover Texts, in the
+notice that says that the Document is released under this License. A
+Front-Cover Text may be at most 5 words, and a Back-Cover Text may be at
+most 25 words.</para>
+
+<para id="gfdl-transparent">A "Transparent" copy of the
Document means a
+machine-readable copy, represented in a format whose specification is
+available to the general public, that is suitable for revising the
+document straightforwardly with generic text editors or (for images
+composed of pixels) generic paint programs or (for drawings) some widely
+available drawing editor, and that is suitable for input to text
+formatters or for automatic translation to a variety of formats suitable
+for input to text formatters. A copy made in an otherwise Transparent
+file format whose markup, or absence of markup, has been arranged to
+thwart or discourage subsequent modification by readers is not
+Transparent. An image format is not Transparent if used for any
+substantial amount of text. A copy that is not "Transparent" is called
+"Opaque".</para>
+
+<para>Examples of suitable formats for Transparent copies include plain
+ASCII without markup, Texinfo input format, LaTeX input format, SGML or
+XML using a publicly available DTD, and standard-conforming simple HTML,
+PostScript or PDF designed for human modification. Examples of
+transparent image formats include PNG, XCF and JPG. Opaque formats
+include proprietary formats that can be read and edited only by
+proprietary word processors, SGML or XML for which the DTD and/or
+processing tools are not generally available, and the machine-generated
+HTML, PostScript or PDF produced by some word processors for output
+purposes only.</para>
+
+<para id="gfdl-title-page">The "Title Page" means, for a
printed book,
+the title page itself, plus such following pages as are needed to hold,
+legibly, the material this License requires to appear in the title page.
+For works in formats which do not have any title page as such, "Title
+Page" means the text near the most prominent appearance of the work's
+title, preceding the beginning of the body of the text.</para>
+
+<para id="gfdl-entitled">A section "Entitled XYZ" means a named
subunit
+of the Document whose title either is precisely XYZ or contains XYZ in
+parentheses following text that translates XYZ in another language.
+(Here XYZ stands for a specific section name mentioned below, such as
+"Acknowledgements", "Dedications", "Endorsements", or
"History".) To
+"Preserve the Title" of such a section when you modify the Document
+means that it remains a section "Entitled XYZ" according to this
+definition.</para>
+
+<para>The Document may include Warranty Disclaimers next to the notice
+which states that this License applies to the Document. These Warranty
+Disclaimers are considered to be included by reference in this License,
+but only as regards disclaiming warranties: any other implication that
+these Warranty Disclaimers may have is void and has no effect on the
+meaning of this License.</para>
+</section>
+
+<section id="gfdl-2"><title>VERBATIM COPYING</title>
+
+<para>You may copy and distribute the Document in any medium, either
+commercially or noncommercially, provided that this License, the
+copyright notices, and the license notice saying this License applies to
+the Document are reproduced in all copies, and that you add no other
+conditions whatsoever to those of this License. You may not use
+technical measures to obstruct or control the reading or further copying
+of the copies you make or distribute. However, you may accept
+compensation in exchange for copies. If you distribute a large enough
+number of copies you must also follow the conditions in section 3.
+</para>
+
+<para>You may also lend copies, under the same conditions stated above,
+and you may publicly display copies.</para>
+</section>
+
+<section id="gfdl-3"><title>COPYING IN QUANTITY</title>
+
+<para>If you publish printed copies (or copies in media that commonly
+have printed covers) of the Document, numbering more than 100, and the
+Document's license notice requires Cover Texts, you must enclose the
+copies in covers that carry, clearly and legibly, all these Cover Texts:
+Front-Cover Texts on the front cover, and Back-Cover Texts on the back
+cover. Both covers must also clearly and legibly identify you as the
+publisher of these copies. The front cover must present the full title
+with all words of the title equally prominent and visible. You may add
+other material on the covers in addition. Copying with changes limited
+to the covers, as long as they preserve the title of the Document and
+satisfy these conditions, can be treated as verbatim copying in other
+respects.</para>
+
+<para>If the required texts for either cover are too voluminous to fit
+legibly, you should put the first ones listed (as many as fit
+reasonably) on the actual cover, and continue the rest onto adjacent
+pages.</para>
+
+<para>If you publish or distribute Opaque copies of the Document
+numbering more than 100, you must either include a machine-readable
+Transparent copy along with each Opaque copy, or state in or with each
+Opaque copy a computer-network location from which the general
+network-using public has access to download using public-standard
+network protocols a complete Transparent copy of the Document, free of
+added material. If you use the latter option, you must take reasonably
+prudent steps, when you begin distribution of Opaque copies in quantity,
+to ensure that this Transparent copy will remain thus accessible at the
+stated location until at least one year after the last time you
+distribute an Opaque copy (directly or through your agents or retailers)
+of that edition to the public.</para>
+
+<para>It is requested, but not required, that you contact the authors of
+the Document well before redistributing any large number of copies, to
+give them a chance to provide you with an updated version of the
+Document.</para>
+</section>
+
+<section id="gfdl-4"><title>MODIFICATIONS</title>
+
+<para>You may copy and distribute a Modified Version of the Document
+under the conditions of sections 2 and 3 above, provided that you
+release the Modified Version under precisely this License, with the
+Modified Version filling the role of the Document, thus licensing
+distribution and modification of the Modified Version to whoever
+possesses a copy of it. In addition, you must do these things in the
+Modified Version:</para>
+
+<orderedlist id="gfdl-modif-cond" numeration="upperalpha">
+ <title>GNU FDL Modification Conditions</title>
+<listitem><simpara>Use in the Title Page (and on the covers, if any) a
+ title distinct from that of the Document, and from those of previous
+ versions (which should, if there were any, be listed in the History
+ section of the Document). You may use the same title as a previous
+ version if the original publisher of that version gives permission.
+</simpara></listitem>
+<listitem><simpara>List on the Title Page, as authors, one or more
+ persons or entities responsible for authorship of the modifications in
+ the Modified Version, together with at least five of the principal
+ authors of the Document (all of its principal authors, if it has fewer
+ than five), unless they release you from this requirement.
+</simpara></listitem>
+<listitem><simpara>State on the Title page the name of the publisher of
+ the Modified Version, as the publisher.</simpara></listitem>
+<listitem><simpara>Preserve all the copyright notices of the Document.
+</simpara></listitem>
+<listitem><simpara>Add an appropriate copyright notice for your
+ modifications adjacent to the other copyright notices.
+</simpara></listitem>
+<listitem><simpara>Include, immediately after the copyright notices, a
+ license notice giving the public permission to use the Modified
+ Version under the terms of this License, in the form shown in the
+ <link linkend="gfdl-addendum">Addendum</link> below.
+</simpara></listitem>
+<listitem><simpara>Preserve in that license notice the full lists of
+ Invariant Sections and required Cover Texts given in the Document's
+ license notice.</simpara></listitem>
+<listitem><simpara>Include an unaltered copy of this License.
+</simpara></listitem>
+<listitem><simpara>Preserve the section Entitled "History",
Preserve its
+ Title, and add to it an item stating at least the title, year, new
+ authors, and publisher of the Modified Version as given on the Title
+ Page. If there is no section Entitled "History" in the Document,
+ create one stating the title, year, authors, and publisher of the
+ Document as given on its Title Page, then add an item describing the
+ Modified Version as stated in the previous sentence.
+</simpara></listitem>
+<listitem><simpara>Preserve the network location, if any, given in the
+ Document for public access to a Transparent copy of the Document, and
+ likewise the network locations given in the Document for previous
+ versions it was based on. These may be placed in the "History"
+ section. You may omit a network location for a work that was
+ published at least four years before the Document itself, or if the
+ original publisher of the version it refers to gives permission.
+</simpara></listitem>
+<listitem><simpara>For any section Entitled "Acknowledgements" or
+ "Dedications", Preserve the Title of the section, and preserve in the
+ section all the substance and tone of each of the contributor
+ acknowledgements and/or dedications given therein.
+</simpara></listitem>
+<listitem><simpara>Preserve all the Invariant Sections of the Document,
+ unaltered in their text and in their titles. Section numbers or the
+ equivalent are not considered part of the section titles.
+</simpara></listitem>
+<listitem><simpara>Delete any section Entitled "Endorsements".
+ Such a section may not be included in the Modified Version.
+</simpara></listitem>
+<listitem><simpara>Do not retitle any existing section to be Entitled
+ "Endorsements" or to conflict in title with any Invariant Section.
+</simpara></listitem>
+<listitem><simpara>Preserve any Warranty Disclaimers.
+</simpara></listitem>
+</orderedlist>
+
+<para>If the Modified Version includes new front-matter sections or
+appendices that qualify as Secondary Sections and contain no material
+copied from the Document, you may at your option designate some or all
+of these sections as invariant. To do this, add their titles to the
+list of Invariant Sections in the Modified Version's license notice.
+These titles must be distinct from any other section titles.</para>
+
+<para>You may add a section Entitled "Endorsements", provided it
+contains nothing but endorsements of your Modified Version by various
+parties--for example, statements of peer review or that the text has
+been approved by an organization as the authoritative definition of a
+standard.</para>
+
+<para>You may add a passage of up to five words as a Front-Cover Text,
+and a passage of up to 25 words as a Back-Cover Text, to the end of the
+list of Cover Texts in the Modified Version. Only one passage of
+Front-Cover Text and one of Back-Cover Text may be added by (or through
+arrangements made by) any one entity. If the Document already includes
+a cover text for the same cover, previously added by you or by
+arrangement made by the same entity you are acting on behalf of, you may
+not add another; but you may replace the old one, on explicit permission
+from the previous publisher that added the old one.</para>
+
+<para>The author(s) and publisher(s) of the Document do not by this
+License give permission to use their names for publicity for or to
+assert or imply endorsement of any Modified Version.</para>
+</section>
+
+<section id="gfdl-5"><title>COMBINING DOCUMENTS</title>
+
+<para>You may combine the Document with other documents released under
+this License, under the terms defined in <link linkend="gfdl-4">section
+4</link> above for modified versions, provided that you include in the
+combination all of the Invariant Sections of all of the original
+documents, unmodified, and list them all as Invariant Sections of your
+combined work in its license notice, and that you preserve all their
+Warranty Disclaimers.</para>
+
+<para>The combined work need only contain one copy of this License, and
+multiple identical Invariant Sections may be replaced with a single
+copy. If there are multiple Invariant Sections with the same name but
+different contents, make the title of each such section unique by adding
+at the end of it, in parentheses, the name of the original author or
+publisher of that section if known, or else a unique number. Make the
+same adjustment to the section titles in the list of Invariant Sections
+in the license notice of the combined work.</para>
+
+<para>In the combination, you must combine any sections Entitled
+"History" in the various original documents, forming one section
+Entitled "History"; likewise combine any sections Entitled
+"Acknowledgements", and any sections Entitled "Dedications". You
must
+delete all sections Entitled "Endorsements".</para>
+</section>
+
+<section id="gfdl-6"><title>COLLECTIONS OF DOCUMENTS</title>
+
+<para>You may make a collection consisting of the Document and other
+documents released under this License, and replace the individual copies
+of this License in the various documents with a single copy that is
+included in the collection, provided that you follow the rules of this
+License for verbatim copying of each of the documents in all other
+respects.</para>
+
+<para>You may extract a single document from such a collection, and
+distribute it individually under this License, provided you insert a
+copy of this License into the extracted document, and follow this
+License in all other respects regarding verbatim copying of that
+document.</para>
+</section>
+
+<section id="gfdl-7"><title>AGGREGATION WITH INDEPENDENT
WORKS</title>
+
+<para>A compilation of the Document or its derivatives with other
+separate and independent documents or works, in or on a volume of a
+storage or distribution medium, is called an "aggregate" if the
+copyright resulting from the compilation is not used to limit the legal
+rights of the compilation's users beyond what the individual works
+permit. When the Document is included in an aggregate, this License does
+not apply to the other works in the aggregate which are not themselves
+derivative works of the Document.</para>
+
+<para>If the Cover Text requirement of section 3 is applicable to these
+copies of the Document, then if the Document is less than one half of
+the entire aggregate, the Document's Cover Texts may be placed on covers
+that bracket the Document within the aggregate, or the electronic
+equivalent of covers if the Document is in electronic form. Otherwise
+they must appear on printed covers that bracket the whole
+aggregate.</para>
+</section>
+
+<section id="gfdl-8"><title>TRANSLATION</title>
+
+<para>Translation is considered a kind of modification, so you may
+distribute translations of the Document under the terms of section 4.
+Replacing Invariant Sections with translations requires special
+permission from their copyright holders, but you may include
+translations of some or all Invariant Sections in addition to the
+original versions of these Invariant Sections. You may include a
+translation of this License, and all the license notices in the
+Document, and any Warranty Disclaimers, provided that you also include
+the original English version of this License and the original versions
+of those notices and disclaimers. In case of a disagreement between the
+translation and the original version of this License or a notice or
+disclaimer, the original version will prevail.</para>
+
+<para>If a section in the Document is Entitled "Acknowledgements",
+"Dedications", or "History", the requirement (section 4) to Preserve
its
+Title (section 1) will typically require changing the actual
+title.</para>
+</section>
+
+<section id="gfdl-9"><title>TERMINATION</title>
+
+<para>You may not copy, modify, sublicense, or distribute the Document
+except as expressly provided for under this License. Any other attempt
+to copy, modify, sublicense or distribute the Document is void, and will
+automatically terminate your rights under this License. However,
+parties who have received copies, or rights, from you under this License
+will not have their licenses terminated so long as such parties remain
+in full compliance.</para>
+</section>
+
+<section id="gfdl-10"><title>FUTURE REVISIONS OF THIS
LICENSE</title>
+
+<para>The Free Software Foundation may publish new, revised versions of
+the GNU Free Documentation License from time to time. Such new versions
+will be similar in spirit to the present version, but may differ in
+detail to address new problems or concerns. See
+http://www.gnu.org/copyleft/.</para>
+
+<para>Each version of the License is given a distinguishing version
+number. If the Document specifies that a particular numbered version of
+this License "or any later version" applies to it, you have the option
+of following the terms and conditions either of that specified version
+or of any later version that has been published (not as a draft) by the
+Free Software Foundation. If the Document does not specify a version
+number of this License, you may choose any version ever published (not
+as a draft) by the Free Software Foundation.</para>
+</section>
+
+<section id="gfdl-addendum"><title>ADDENDUM: How to use this
License for
+ your documents</title>
+
+<para>To use this License in a document you have written, include a copy
+of the License in the document and put the following copyright and
+license notices just after the title page:</para>
+
+<blockquote id="copyright-sample">
+ <title>Sample Invariant Sections list</title><para>
+ Copyright (c) YEAR YOUR NAME.
+ Permission is granted to copy, distribute and/or modify this document
+ under the terms of the GNU Free Documentation License, Version 1.2
+ or any later version published by the Free Software Foundation;
+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.
+ A copy of the license is included in the section entitled "GNU
+ Free Documentation License".
+</para></blockquote>
+
+<para>If you have Invariant Sections, Front-Cover Texts and Back-Cover
+Texts, replace the "with...Texts." line with this:</para>
+
+<blockquote id="inv-cover-sample">
+ <title>Sample Invariant Sections list</title>
+<para>
+ with the Invariant Sections being LIST THEIR TITLES, with the
+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.
+</para></blockquote>
+
+<para>If you have Invariant Sections without Cover Texts, or some other
+combination of the three, merge those two alternatives to suit the
+situation.</para>
+
+<para>If your document contains nontrivial examples of program code, we
+recommend releasing these examples in parallel under your choice of free
+software license, such as the GNU General Public License, to permit
+their use in free software.</para>
+</section>
+</appendix>
Added: projects/wiki/extensions/wiki2xml/global_functions.php
===================================================================
--- projects/wiki/extensions/wiki2xml/global_functions.php (rev
0)
+++ projects/wiki/extensions/wiki2xml/global_functions.php 2007-07-04 19:14:29 UTC (rev
3798)
@@ -0,0 +1,43 @@
+<?php
+
+# Add authors to global list
+function add_authors ( $authors ) {
+ global $wiki2xml_authors ;
+ foreach ( $authors AS $author ) {
+ if ( !in_array ( $author , $wiki2xml_authors ) ) {
+ $wiki2xml_authors[] = $author ;
+ }
+ }
+}
+
+function add_author ( $author ) {
+ add_authors ( array ( $author ) ) ;
+ }
+
+
+# For text file structure creation and browsing
+function get_file_location_global ( $basedir , $ns , $title , $make_dirs = false ) {
+ $title = urlencode ( $title ) ;
+ $title = str_replace ( ":" , "_" , $title ) ;
+ $m = md5 ( $title ) ;
+ $ret = "" ;
+ $ret->file = $title ;
+ if ( $ret->file == "Con" ) $ret->file = "_Con" ; # Windows
can't create files named "con.txt" (!), workaround
+ $ret->dir = $basedir . "/" . $ns ;
+ if ( $make_dirs ) @mkdir ( $ret->dir ) ;
+ $ret->dir .= "/" . substr ( $m , 0 , 1 ) ;
+ if ( $make_dirs ) @mkdir ( $ret->dir ) ;
+ $ret->dir .= "/" . substr ( $m , 1 , 2 ) ;
+ if ( $make_dirs ) @mkdir ( $ret->dir ) ;
+ $ret->fullname = $ret->dir . "/" . $ret->file ;
+ return $ret ;
+}
+
+
+function xml_articles_header() {
+ global $xmlg ;
+# if ( !isset ( $xmlg['xml_articles_header'] ) ) return "" ;
+ return $xmlg['xml_articles_header'] ;
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/mediawiki_converter.php
===================================================================
--- projects/wiki/extensions/wiki2xml/mediawiki_converter.php (rev
0)
+++ projects/wiki/extensions/wiki2xml/mediawiki_converter.php 2007-07-04 19:14:29 UTC (rev
3798)
@@ -0,0 +1,288 @@
+<?php
+
+# PHP4 and early PHP5 bug workaround:
+require_once ( "filter_named_entities.php" ) ;
+
+require_once ( "global_functions.php" ) ;
+require_once ( "wiki2xml.php" ) ;
+require_once ( "content_provider.php" ) ;
+
+# A funtion to remove directories and subdirectories
+# Modified from
php.net
+function SureRemoveDir($dir) {
+ if(!$dh = @opendir($dir)) return;
+ while (($obj = readdir($dh))) {
+ if($obj=='.' || $obj=='..') continue;
+ if (!(a)unlink($dir.'/'.$obj)) {
+ SureRemoveDir($dir.'/'.$obj);
+ }
+ }
+ @closedir ( $dh ) ;
+ @rmdir($dir) ;
+}
+
+/**
+ * The main converter class
+ */
+class MediaWikiConverter {
+
+ /**
+ * Converts a single article in MediaWiki format to XML
+ */
+ function article2xml ( $title , &$text , $params = array () ) {
+ global $content_provider , $wiki2xml_authors ;
+ $ot = $title ;
+ $title = urlencode ( $title ) ;
+ $p = new wiki2xml ;
+ $p->auto_fill_templates = $params['resolvetemplates'] ;
+ $p->template_list = array () ; ;
+ foreach ( $params['templates'] AS $x ) {
+ $x = trim ( ucfirst ( $x ) ) ;
+ if ( $x != "" ) $p->template_list[] = $x ;
+ }
+ $xml = '<article' ;
+ if ( $title != "" ) {
+ $xml .= " title='{$title}'" ;
+ $content_provider->add_article ( urldecode ( $ot ) ) ;
+ }
+ $xml .= '>' ;
+ $xml .= $p->parse ( $text ) ;
+ if ( count ( $wiki2xml_authors ) > 0 ) {
+ $xml .= "<authors>" ;
+ foreach ( $wiki2xml_authors AS $author )
+ $xml .= "<author>{$author}</author>" ;
+ $xml .= "</authors>" ;
+ }
+ $xml .= "</article>" ;
+ return $xml ;
+ }
+
+ /**
+ * Converts XML to plain text
+ */
+ function articles2text ( &$xml , $params = array () ) {
+ global $wiki2xml_authors ;
+ require_once ( "xml2txt.php" ) ;
+
+ $wiki2xml_authors = array () ;
+ $x2t = new xml2php ;
+ $tree = $x2t->scanString ( $xml ) ;
+ if ( $params['plaintext_markup'] ) {
+ $tree->bold = '*' ;
+ $tree->italics = '/' ;
+ $tree->underline = '_' ;
+ }
+ if ( $params['plaintext_prelink'] ) {
+ $tree->pre_link = "→" ;
+ }
+
+ $text = trim ( $tree->parse ( $tree ) ) ;
+
+ $authors = "" ;
+ if ( count ( $wiki2xml_authors ) > 0 ) {
+ asort ( $wiki2xml_authors ) ;
+ $authors = "\n--------------------\nTHE ABOVE TEXT IS LICENSED UNDER THE GFDL.
CONTRIBUTORS INCLUDE:\n\n" .
+ implode ( ", " , $wiki2xml_authors ) ;
+ }
+
+ return $text . $authors ;
+ }
+
+ /**
+ * Converts XML to XHTML
+ */
+ function articles2xhtml ( &$xml , $params = array () ) {
+ global $xml2xhtml ;
+ require_once ( "xml2xhtml.php" ) ;
+ $lang = "EN" ; # Dummy
+
+ $ret = "" ;
+ $ret .= '<?xml version="1.0" encoding="UTF-8" ?>' ;
+ $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//' . $lang
. '" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' ;
+ if ( !$params['xhtml_source'] )
+ $ret .= '<html
xmlns="http://www.w3.org/1999/xhtml">' ;
+ else $ret .= '<html>' ;
+ $ret .= '<head>' ;
+ #$ret .= '<link rel="stylesheet" type="text/css"
media="screen,projection"
href="http://de.wikipedia.org/skins-1.5/monobook/main.css" />' ;
+ #$ret .= '<link rel="stylesheet" type="text/css"
media="print"
href="http://en.wikipedia.org/skins-1.5/common/commonPrint.css" />' ;
+ $ret .= '<link rel="stylesheet" type="text/css"
href="href://' ;
+ $ret .= $params["site_base_url"] .
'index.php?title=MediaWiki:Common.css&action=raw" />' ;
+ #$ret .= '<link rel="stylesheet" type="text/css"
href="href://' ;
+ #$ret .= $params["site_base_url"] .
'index.php?title=MediaWiki:Monobook.css&action=raw" />' ;
+ $ret .= '<title>' ;
+ if ( isset ( $params['page_title'] ) ) $ret .= $params['page_title'] ;
+ else $ret .= $params["book_title"] ;
+ $ret .= '</title>' ;
+ $ret .= '</head>' ;
+ $ret .= '<body>' ;
+
+ convert_xml_xhtml ( $xml ) ;
+ $ret .= $xml2xhtml->s ;
+
+# $xml2xhtml = new XML2XHTML ;
+# $ret .= $xml2xhtml->scan_xml ( $xml ) ;
+
+ $ret .= '</body>' ;
+ $ret .= '</html>' ;
+ return $ret ;
+ }
+
+ /**
+ * Converts XML to ODT XML
+ */
+ function articles2odt ( &$xml , $params = array () , $use_gfdl = false ) {
+ global $wiki2xml_authors , $xml2odt ;
+ require_once ( "xml2odt.php" ) ;
+
+ # XML text to tree
+ $xml2odt = new XML2ODT ;
+ $wiki2xml_authors = array () ;
+ $x2t = new xml2php ;
+ $tree = $x2t->scanString ( $xml ) ;
+
+ # Tree to ODT
+ $out = "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
+ $body = $tree->parse ( $tree ) ;
+ $out .= $xml2odt->get_odt_start () ;
+ $out .= '<office:body><office:text>' ;
+ $out .= $body ;
+ $out .= '</office:text></office:body>' ;
+ $out .= "</office:document-content>" ;
+ return $out ;
+ }
+
+ /**
+ * Converts XML to DocBook XML
+ */
+ function articles2docbook_xml ( &$xml , $params = array () , $use_gfdl = false ) {
+ global $wiki2xml_authors ;
+ require_once ( "xml2docbook_xml.php" ) ;
+
+ $wiki2xml_authors = array () ;
+ $x2t = new xml2php ;
+ $tree = $x2t->scanString ( $xml ) ;
+
+ # Chosing DTD; parameter-given or default
+ $dtd = "" ;
+ if ( isset ( $params['docbook']['dtd'] ) )
+ $dtd = $params['docbook']['dtd'] ;
+ if ( $dtd == "" ) $dtd =
'http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd' ;
+
+ $out = "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
+ $out .= '<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
"' . $dtd . '"' ;
+ if ( $use_gfdl ) {
+ $out .= "\n[<!ENTITY gfdl SYSTEM \"gfdl.xml\">]\n" ;
+ }
+ $out .= ">\n\n<book>\n" ;
+ $out2 = trim ( $tree->parse ( $tree ) ) ;
+
+ $out .= "<bookinfo>" ;
+ $out .= "<title>" . $params['book_title'] .
"</title>" ;
+ if ( count ( $wiki2xml_authors ) > 0 ) {
+ asort ( $wiki2xml_authors ) ;
+ $out .= "<authorgroup>" ;
+ foreach ( $wiki2xml_authors AS $author ) {
+ $out .=
"<author><othername>{$author}</othername></author>" ;
+ }
+ $out .= "</authorgroup>" ;
+ }
+ $out .= "<legalnotice><para>" ;
+ $out .= "Permission to use, copy, modify and distribute this document under the
GNU Free Documentation License (GFDL)." ;
+ $out .= "</para></legalnotice>" ;
+ $out .= "</bookinfo>" ;
+
+ $out .= $out2 ;
+/*
+ if ( count ( $wiki2xml_authors ) > 0 ) {
+ asort ( $wiki2xml_authors ) ;
+ $out .= "<appendix>" ;
+ $out .= "<title>List of contributors</title>" ;
+ $out .= "<para>All text in this document is licensed under the GFDL. The
following is a list of contributors (anonymous editors are not listed).</para>"
;
+ $out .= "<para>" ;
+ $out .= implode ( ", " , $wiki2xml_authors ) ;
+ $out .= "</para>" ;
+ $out .= "</appendix>" ;
+ }
+*/
+ if ( $use_gfdl ) {
+ $out .= "\n&gfdl;\n" ;
+ }
+
+ $out .= "\n</book>\n" ;
+
+ return $out ;
+ }
+
+ /**
+ * Converts XML to PDF via DocBook
+ * Requires special parameters in local.php to be set (see sample_local.php)
+ * Uses articles2docbook_xml
+ */
+ function articles2docbook_pdf ( &$xml , $params = array () , $mode = "PDF"
) {
+ global $xmlg ;
+ $docbook_xml = $this->articles2docbook_xml ( $xml , $params ,
$params['add_gfdl'] ) ;
+
+ # Create temporary directory
+ $temp_dir = "MWC" ;
+ $temp_dir .= substr ( mt_rand() , 0 , 4 ) ;
+ $temp_dir = tempnam ( $params['docbook']['temp_dir'], $temp_dir ) ;
+ $project = basename ( $temp_dir ) ;
+ unlink ( $temp_dir ) ; # It is currently a file, so...
+ mkdir ( $temp_dir ) ;
+
+ # Write XML file
+ $xml_file = $temp_dir . "/" . $project . ".xml" ;
+ $handle = fopen ( $xml_file , 'wb' ) ;
+ fwrite ( $handle , utf8_encode ( $docbook_xml ) ) ;
+ fclose ( $handle ) ;
+ if ( $params['add_gfdl'] ) {
+ copy ( $xmlg['sourcedir'] . "/gfdl.xml" , $temp_dir .
"/gfdl.xml" ) ;
+ }
+
+ if ( $params['docbook']['out_dir'] ) {
+ $output_dir = $params['docbook']['out_dir'];
+ } else {
+ $output_dir = $params['docbook']['temp_dir'];
+ }
+
+
+ # Call converter
+ if ( $mode == "PDF" ) {
+ $command = str_replace ( "%1" , $xml_file ,
$params['docbook']['command_pdf'] ) ;
+ $out_subdir = 'pdf' ;
+ } else if ( $mode == "HTML" ) {
+ $command = str_replace ( "%1" , $xml_file ,
$params['docbook']['command_html'] ) ;
+ $out_subdir = 'html' ;
+ }
+
+ # PHP4 does not have recursive mkdir
+ $output_dir = $output_dir . '/' . $out_subdir ;
+ if ( ! file_exists( $output_dir ) ) {
+ mkdir ( $output_dir ) ;
+ }
+ $output_dir = $output_dir . '/' . $project;
+ if ( ! file_exists( $output_dir ) ) {
+ mkdir ( $output_dir ) ;
+ }
+
+ $command = $command . ' --nochunks --output ' . $output_dir;
+
+ exec ( $command ) ;
+
+ # Cleanup xml file
+ SureRemoveDir ( $temp_dir ) ;
+
+ # Check if everything is OK
+ $output_filename = $output_dir . '/' . $project . '.' . $out_subdir ;
+ if ( !file_exists ( $output_filename ) ) {
+ header('Content-type: text/html; charset=utf-8');
+ print "ERROR : Document was not created: Docbook creator has failed! Command was:
$command. output_filename = $output_filename" ;
+ }
+
+ # Return pdf filename
+ return $output_filename ;
+ }
+}
+
+
+?>
Added: projects/wiki/extensions/wiki2xml/sample_local.php
===================================================================
--- projects/wiki/extensions/wiki2xml/sample_local.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/sample_local.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,74 @@
+<?php
+
+/**
+ * You can create your own local.php file, similar to this one, to configure
+ * your local installation.
+ * If you do not create a local.php file, the scripts will run with
+ * default settings.
+ */
+
+$xmlg["site_base_url"] = "127.0.0.1/phase3" ;
+$xmlg["use_special_export"] = 1 ;
+
+# Directory for temporary files:
+$xmlg["temp_dir"] = "C:/windows/temp" ;
+
+$xmlg["docbook"] = array (
+ "command_pdf" => "C:/docbook/bat/docbook_pdf.bat %1" ,
+ "command_html" => "C:/docbook/bat/docbook_html.bat %1" ,
+ "temp_dir" => "C:/docbook/repository" ,
+ "out_dir" => "C:/docbook/output" ,
+ "dtd" => "file:/c:/docbook/dtd/docbookx.dtd"
+) ;
+
+/* To allow parameters passed as URL parameter ($_GET), set
+$xmlg['allow_get'] = true ;
+
+Parameters:
+doit=1
+text=lines_of_text_or_titles
+whatsthis=wikitext/articlelist
+site=en.wikipedia.org/w
+output_format=xml/text/xhtml/docbook_xml/odt_xml/odt
+
+Optional:
+use_templates=all/none/these/notthese
+templates=lines_of_templates
+document_title=
+add_gfdl=1
+keep_categories=1
+keep_interlanguage=1
+*/
+
+
+
+# To use the toolserver text access, set
+# $xmlg["use_toolserver_url"] = true ;
+# $xmlg["use_special_export"] = false ;
+
+# On Windows, set
+# $xmlg['is_windows'] = true ;
+
+### Uncomment the following to use Special:Export and (potentially) automatic
+### authors list; a little slower, though:
+#$xmlg["use_special_export"] = 1 ;
+
+
+### Uncomment and localize the following to offer ODT export
+
+# Path to the zip/unzip programs; can be omitted if in path:
+#$xmlg["zip_odt_path"] = "E:\\Program Files\\7-Zip" ;
+
+# Command to zip directory $1 to file $2;
+# NOTE THE '*' AFTER '$2' FOR WINDOWS ONLY!
+#$xmlg["zip_odt"] = '7z.exe a -r -tzip $1 $2*' ;
+
+# Command to unzip file $1 to directory $2:
+#$xmlg["unzip_odt"] = '7z.exe x $1 -o$2' ;
+
+
+# If you want to do text-file browsing, run "xmldump2files.php" once
+# (see settings there), then set this:
+# $base_text_dir = "C:/dewiki-20060327-pages-articles" ;
+
+?>
Added: projects/wiki/extensions/wiki2xml/template.odt
===================================================================
(Binary files differ)
Property changes on: projects/wiki/extensions/wiki2xml/template.odt
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: projects/wiki/extensions/wiki2xml/test.xml
===================================================================
--- projects/wiki/extensions/wiki2xml/test.xml (rev 0)
+++ projects/wiki/extensions/wiki2xml/test.xml 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,35 @@
+<?xml version='1.0' encoding='UTF-8' ?>
+<articles loadtime='0 sec' rendertime='0.0158550739288 sec'
totaltime='0.0158550739288 sec'>
+<article title='Main+Page'>
+<paragraph>
+You have reached
a<space/><italics>developper</italics><space/>mediawiki
installation :)</paragraph>
+<paragraph>
+Don't trust anything you might find here and expect crashes !</paragraph>
+<paragraph>
+<link><target>User:Hashar</target><part>Hashar</part></link><space/>23:48,
13 April 2006 (CEST)</paragraph>
+<paragraph>
+<link><target>TOTO</target></link></paragraph>
+<paragraph>
+<link><target>redircat</target></link></paragraph>
+<paragraph>
+number of articles:<space/><xhtml:div class='noarticletext'>There is
currently no text in this page, you
can<space/><link><target>Special:Search/Wiki2XML</target><part>search
for this page title</part></link><space/>in other pages
or<space/><link type='external'
href='http://twenkill.dyndns.org/wiki?title=Special:Wiki2XML&...
this page</link>.</xhtml:div></paragraph>
+<paragraph>
+ <xhtml:pre>Oh no this is some ugly code</xhtml:pre>
+</paragraph>
+<list type='numbered'>
+ <listitem>numb 1</listitem>
+ <listitem>numb 2</listitem>
+ <listitem>numb 3</listitem>
+</list>
+<paragraph>
+And unordered:</paragraph>
+<list type='bullet'>
+ <listitem>yeah 1</listitem>
+ <listitem><link type='external'
href='http://www.google.com/'>Google</link></listitem...
+ <listitem>yeah 3</listitem>
+</list>
+<paragraph>
+ <link><target>Tobedoom</target></link>
+</paragraph>
+</article>
+</articles>
Added: projects/wiki/extensions/wiki2xml/w2x.php
===================================================================
--- projects/wiki/extensions/wiki2xml/w2x.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/w2x.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,471 @@
+<?php
+# Copyright by Magnus Manske (2005 - 2006)
+# Released under GPL
+
+@set_time_limit ( 0 ) ; # No time limit
+ini_set('user_agent','MSIE 4\.0b2;'); # Fake user agent
+
+if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone
+ include_once ( "default.php" ) ; # Which will include local.php, if available
+}
+
+require_once ( "mediawiki_converter.php" ) ;
+
+## TIMER FUNCTION
+
+function microtime_float()
+{
+ list($usec, $sec) = explode(" ", microtime());
+ return ((float)$usec + (float)$sec);
+}
+
+function get_form ( $as_extension = false ) {
+ global $xmlg ;
+
+ $_wt = get_param ( 'whatsthis' , 'articlelist' ) ;
+ $wt['wikitext'] = $wt['articlelist'] = $wt['listpagename'] =
"" ;
+ $wt[$_wt] = 'checked' ;
+
+ $xmlg["site_base_url"] =
get_param('site',$xmlg["site_base_url"]) ;
+ $_out = get_param ( 'output_format' , 'xml' ) ;
+ $out['xml'] = $out['text'] = $out['translated_text'] =
$out['xhtml'] = $out['docbook_xml'] = $out['docbook_pdf'] =
$out['docbook_html'] =
+ $out['odt_xml'] = $out['odt'] = '' ;
+ $out[$_out] = 'checked' ;
+
+ $optional = array () ;
+ if ( isset ( $xmlg['docbook']['command_pdf'] ) ) {
+ $optional[] = "<INPUT {$out['docbook_pdf']} type='radio'
name='output_format' value='docbook_pdf'>DocBook PDF" ;
+ }
+ if ( isset ( $xmlg['docbook']['command_html'] ) ) {
+ $optional[] = "<INPUT {$out['docbook_html']} type='radio'
name='output_format' value='docbook_html'>DocBook HTML" ;
+ }
+ if ( isset ( $xmlg['zip_odt'] ) ) {
+ $optional[] = "<INPUT {$out['odt_xml']} type='radio'
name='output_format' value='odt_xml'>OpenOffice XML" ;
+ $optional[] = "<INPUT {$out['odt']} type='radio'
name='output_format' value='odt'>OpenOffice ODT" .
+ "<input type='checkbox' name='odt_footnote' value='1'
".(get_param('odt_footnote',false)?'checked':'').">References
as endnotes (instead of footnotes)" ;
+ }
+ $optional = "<br/>" . implode ( "<br/>" , $optional ) ;
+
+ if ( $as_extension ) $site = "<input type='hidden' name='site'
value=''/>" ;
+ else $site = "Site : http://<input type='text' name='site'
value='".$xmlg["site_base_url"]."'/>/index.php<br/>"
;
+
+ $additional = array() ;
+ if ( $xmlg['allow_get'] ) {
+ $additional[] = "This page can be called with parameters:
w2x.php?doit=1&whatsthis=articlelist&site=en.wikipedia.org/w&...
;
+ $additional[] = "For additional parameters, see <a
href='README'>here</a>" ;
+ }
+
+ # Plain text translation options
+ $a = array (
+ 'en' => 'English',
+ 'de' => 'German',
+ 'fr' => 'French',
+ 'es' => 'Spanish',
+ 'it' => 'Italian',
+ ) ;
+ asort ( $a ) ;
+ $tttlo = "" ;
+ foreach ( $a AS $b => $c ) {
+ $tttlo .= "<option value='{$b}'>{$c}</option>" ;
+ }
+
+ $additional = "<div style='text-align:center; border-top:1px solid
black;width:100%;font-size:12px'>" .
+ implode ( "<br/>" , $additional ) .
+ "</div>" ;
+
+return "<form method='post'>
+<h2>Paste article list or wikitext here</h2>
+<table border='0' width='100%'><tr>
+<td valign='top'><textarea rows='20' cols='80'
style='width:100%' name='text'>" .
+get_param ( 'text' , '' ) .
+"</textarea></td>
+<td width='200px' valign='top' nowrap>
+<INPUT checked type='radio' name='use_templates'
value='all'>Use all templates<br/>
+<INPUT type='radio' name='use_templates' value='none'>Do
not use templates<br/>
+<INPUT type='radio' name='use_templates' value='these'>Use
these templates<br/>
+<INPUT type='radio' name='use_templates'
value='notthese'>Use all but these templates<br/>
+<textarea rows='15' cols='30' style='width:100%'
name='templates'></textarea>
+</td></tr></table>
+<table border='0'><tr>
+<td valign='top'>
+This is<br/>
+<INPUT {$wt['wikitext']} type='radio' name='whatsthis'
value='wikitext'>raw wikitext <br/>
+<INPUT {$wt['articlelist']} type='radio' name='whatsthis'
value='articlelist'>a list of articles<br/>
+<INPUT {$wt['listpagename']} type='radio' name='whatsthis'
value='listpagename'>the name of an article with a list of pages<br/>
+
+{$site}
+Title : <input type='text' name='document_title' value=''
size=40/><br/>
+<input type='checkbox' name='add_gfdl' value='1'
checked>Include GFDL (for some output formats)</input><br/>
+<input type='checkbox' name='keep_categories' value='1'
checked>Keep categories</input><br/>
+<input type='checkbox' name='keep_interlanguage' value='1'
checked>Keep interlanguage links</input><br/>
+<input type='submit' name='doit' value='Convert'/>
+</td><td valign='top' style='border-left:1px black solid'>
+<b>Output</b>
+<br/><INPUT {$out['xml']} type='radio'
name='output_format' value='xml'>XML
+<br/><INPUT {$out['text']} type='radio'
name='output_format' value='text'>Plain text
+ <input type='checkbox' name='plaintext_markup' value='1'
".(get_param('plaintext_markup',true)?'checked':'').">Use
*_/ markup</input>
+ <input type='checkbox' name='plaintext_prelink' value='1'
".(get_param('plaintext_prelink',true)?'checked':'').">Put
→ before internal links</input>
+<br/><INPUT {$out['translated_text']} type='radio'
name='output_format' value='translated_text'>Plain text,
google-translated to
+ <select name='translated_text_target_language'>{$tttlo}</select>
(works only for wikipedia/wikibooks)
+<br/><INPUT {$out['xhtml']} type='radio'
name='output_format' value='xhtml'>XHTML
+ <input type='checkbox' name='xhtml_justify' value='1'
".(get_param('xhtml_justify',true)?'checked':'').">Align
paragraphs as 'justify'</input>
+ <input type='checkbox' name='xhtml_logical_markup' value='1'
".(get_param('xhtml_logical_markup',true)?'checked':'').">Use
logical markup (e.g., 'strong' instead of 'b')</input>
+ <input type='checkbox' name='xhtml_source' value='1'
".(get_param('xhtml_source',false)?'checked':'').">Return
source XHTML</input>
+<br/><INPUT {$out['docbook_xml']} type='radio'
name='output_format' value='docbook_xml'>DocBook XML
+{$optional}
+</tr></table>
+</form>
+<p>
+Known issues:
+<ul>
+<li>In templates, {{{variables}}} used within <nowiki> tags will be
replaced as well (too lazy to strip them)</li>
+<li>HTML comments are removed (instead of converted into XML tags)</li>
+</ul>{$additional}
+</p>" ;
+}
+
+function get_param ( $s , $default = NULL ) {
+ global $xmlg ;
+ if ( $xmlg['allow_get'] ) {
+ if ( isset ( $_REQUEST[$s] ) ) {
+ return $_REQUEST[$s] ;
+ } else {
+ return $default ;
+ }
+ } else {
+ if ( isset ( $_POST[$s] ) ) {
+ return $_POST[$s] ;
+ } else {
+ return $default ;
+ }
+ }
+}
+
+# add one article to the stack of to-be-converted articles
+function push_article ( &$aArticles, $article ) {
+
+ # convert _ to ' '
+ $a = trim( $article );
+ if ( $a != "" ) {
+ $aArticles[] = preg_replace( '/_/', ' ', $a );
+ }
+
+}
+
+# Append XML, or links to XML temporary files
+function append_to_xml ( &$xml , $new_xml ) {
+ global $xmlg ;
+ if ( $xmlg["use_xml_temp_files"] ) { # Use temp files
+ if ( !is_array ( $xml ) ) $xml = array () ;
+
+ do {
+ $tmp_file_name = tempnam ( $xmlg["temp_dir"] , "XMLTMP" ) ;
+ $tmp_file = fopen($tmp_file_name, 'wb') ;
+ } while ( $tmp_file === false ) ;
+ fwrite ( $tmp_file , $new_xml ) ;
+ fclose ( $tmp_file ) ;
+
+ $xml[] = $tmp_file_name ;
+ } else { # Do not use temp files
+ $xml .= $new_xml ;
+ }
+}
+
+# Returns the next article XML, or false
+function xml_shift ( &$xml ) {
+ if ( !is_array ( $xml ) ) { # Do not use temp files
+ if ( $xml != '' ) {
+ $x = $xml ;
+ $xml = array () ;
+ return $x ;
+ }
+ return false ;
+ } else { # Use temp files
+ if ( count ( $xml ) == 0 ) return false ;
+ $x = array_shift ( $xml ) ;
+ $ret = file_get_contents ( $x ) ;
+ unlink ( $x ) ;
+ return $ret ;
+ }
+}
+
+# Free temporary XML files, if any
+# Should not be necessary if xml_shift was used
+function xml_cleanup ( &$xml ) {
+ global $xmlg ;
+ if ( !$xmlg["use_xml_temp_files"] ) return ; # not using temp files
+ if ( !is_array ( $xml ) ) return false ;
+ foreach ( $xml AS $x ) {
+ unlink ( $x ) ;
+ }
+ $xml = array () ;
+}
+
+
+## MAIN PROGRAM
+
+if ( get_param('doit',false) ) { # Process
+ $wikitext = stripslashes ( get_param('text') ) ;
+
+ if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone
+ $content_provider = new ContentProviderHTTP ;
+ } else { # MediaWiki extension
+ $content_provider = new ContentProviderMySQL ;
+ }
+ $converter = new MediaWikiConverter ;
+
+ $xmlg["book_title"] = get_param('document_title');
+ $xmlg["site_base_url"] = get_param('site') ;
+ $xmlg["resolvetemplates"] = get_param('use_templates','all')
;
+ $xmlg['templates'] = explode ( "\n" ,
get_param('templates','') ) ;
+ $xmlg['add_gfdl'] = get_param('add_gfdl',false) ;
+ $xmlg['keep_interlanguage'] = get_param('keep_interlanguage',false) ;
+ $xmlg['keep_categories'] = get_param('keep_categories',false) ;
+
+ # the article list
+ $aArticles = array () ;
+
+ $t = microtime_float() ;
+ $xml = "" ;
+
+ $format = get_param('output_format') ;
+ $whatsthis = get_param('whatsthis') ;
+
+ # Catch listnamepage
+ if ( $whatsthis == "listpagename" ) {
+ $listpage = trim ( array_shift ( explode ( "\n" , $wikitext ) ) ) ;
+ $wikitext = $content_provider->get_wiki_text ( $listpage ) ;
+ $lines = explode ( "\n" , $wikitext ) ;
+ $wikitext = array () ;
+ foreach ( $lines AS $l ) {
+ $l1 = substr ( $l , 0 , 1 ) ;
+ if ( $l1 != '*' && $l1 != '#' && $l1 != ':' )
continue ;
+ $l = explode ( '[[' , $l , 2 ) ;
+ $l = trim ( array_shift ( explode ( ']]' , array_pop ( $l ) , 2 ) ) ) ;
+ if ( $l == '' ) continue ;
+ $wikitext[] = $l ;
+ }
+ $wikitext = implode ( "\n" , $wikitext ) ;
+ $whatsthis = 'articlelist' ;
+ }
+
+ # QUICK HACK! NEEDS TO WORK!
+ if ( $format == "odt" || $format == "odt_xml" || $format ==
"docbook_pdf" || $format == "docbook_html" || $format ==
"docbook_xml" ) {
+ $xmlg["allow_xml_temp_files"] = false ;
+ }
+
+ if ( $whatsthis == "wikitext" ) {
+ $content_provider->first_title = "Raw wikitext page" ;
+ $wiki2xml_authors = array () ;
+ $xml = $converter->article2xml ( "" , $wikitext , $xmlg ) ;
+ } else {
+ if ( $xmlg['allow_xml_temp_files'] ) $xmlg['use_xml_temp_files'] = true
;
+
+ foreach ( explode ( "\n" , $wikitext ) AS $a ) {
+ push_article( $aArticles, $a );
+ }
+
+ # set the first article name as the default title
+ if ($xmlg["book_title"] == '') {
+ $xmlg["book_title"] = $aArticles[0];
+ }
+ # as long as we have articles to convert (this might change in between!)
+ while ( $a = array_shift( $aArticles ) ) {
+ $wiki2xml_authors = array () ;
+
+ # Article page|Article name
+ $a = explode ( '|' , $a ) ;
+ if ( count ( $a ) == 1 ) $a[] = $a[0] ;
+ $title_page = trim ( array_shift ( $a ) ) ;
+ $title_name = trim ( array_pop ( $a ) ) ;
+
+ $wikitext = $content_provider->get_wiki_text ( $title_page ) ;
+ add_authors ( $content_provider->authors ) ;
+ append_to_xml ( $xml , $converter->article2xml ( $title_name , $wikitext , $xmlg,
$aArticles ) ) ;
+ #$xml .= $converter->article2xml ( $title_name , $wikitext , $xmlg, &$aArticles
) ;
+ }
+ }
+ $t = microtime_float() - $t ;
+ $tt = round( $t, 3 ) ;
+ $lt = round( $content_provider->load_time, 3 ) ;
+ $t = round( $t - $lt, 3) ;
+
+ $xmlg['xml_articles_header'] = "<articles xmlns:xhtml=\" \"
loadtime='{$lt} sec' rendertime='{$t} sec' totaltime='{$tt}
sec'>" ;
+
+ # Output format
+ if ( $format == "xml" ) {
+ header('Content-type: text/xml; charset=utf-8');
+ print "<?xml version='1.0' encoding='UTF-8' ?>\n" ;
+ print xml_articles_header() ;
+ while ( $x = xml_shift ( $xml ) ) print $x ;
+ print "</articles>" ;
+ } else if ( $format == "text" ) {
+ $xmlg['plaintext_markup'] = get_param('plaintext_markup',false) ;
+ $xmlg['plaintext_prelink'] = get_param('plaintext_prelink',false) ;
+ $out = $converter->articles2text ( $xml , $xmlg ) ;
+ $out = str_replace ( "\n" , "<br/>" , $out ) ;
+ header('Content-type: text/html; charset=utf-8');
+ print $out ;
+
+ } else if ( $format == "translated_text" ) {
+ $xmlg['plaintext_markup'] = false ;
+ $xmlg['plaintext_prelink'] = false ;
+ $out = $converter->articles2text ( $xml , $xmlg ) ;
+ #$out = str_replace ( "\n" , "<br/>" , $out ) ;
+ #header('Content-type: text/html; charset=utf-8');
+ #print $out ;
+ $out = explode ( "\n" , $out ) ;
+ array_shift ( $out ) ;
+ $out = trim ( implode ( "\n" , $out ) ) ;
+ $source_language = array_shift ( explode ( '.' ,
$xmlg["site_base_url"] ) ) ;
+ $target_language = get_param ( 'translated_text_target_language' , 'en'
) ;
+ $langpair = urlencode ( "{$source_language}|{$target_language}" ) ;
+ $url = "http://www.google.com/translate_t?langpair={$langpair}&text=" .
urlencode ( utf8_decode ( $out ) ) ;
+ echo file_get_contents ( $url ) ;
+
+ } else if ( $format == "xhtml" ) {
+ $xmlg['xhtml_justify'] = get_param ( 'xhtml_justify' , false ) ;
+ $xmlg['xhtml_logical_markup'] = get_param ( 'xhtml_logical_markup' ,
false ) ;
+ $xmlg['xhtml_source'] = get_param ( 'xhtml_source' , false ) ;
+
+ if ( $xmlg['xhtml_source'] ) {
+ header('Content-type: text/xml; charset=utf-8');
+ #header('Content-type: text/html; charset=utf-8');
+ $s = $converter->articles2xhtml ( $xml , $xmlg ) ;
+ $s = str_replace ( '>' , ">\n" , $s ) ;
+ $s = str_replace ( '<' , "\n<" , $s ) ;
+ $s = str_replace ( "\n\n" , "\n" , $s ) ;
+ echo trim ( $s ) ;
+ #echo str_replace ( "\n" , '<br/>' , htmlentities ( trim ( $s
) ) ) ;
+ } else {
+ # Header hack for IE
+ if ( stristr($_SERVER["HTTP_ACCEPT"],"application/xhtml+xml") ) {
+ header("Content-type: application/xhtml+xml");
+ } else {
+ header("Content-type: text/html");
+ }
+ echo $converter->articles2xhtml ( $xml , $xmlg ) ;
+ }
+
+ } else if ( $format == "odt" || $format == "odt_xml" ) {
+ if ( isset ( $_REQUEST['odt_footnote'] ) ) $xmlg["odt_footnote"] =
'endnote' ;
+ if ( $xmlg['sourcedir'] == '.' ) $cwd = getcwd() ;
+ else $cwd = $xmlg['sourcedir'] ;
+ $template_file = $cwd . '/template.odt' ;
+
+ $dir_file = tempnam($xmlg["temp_dir"], "ODD");
+ $dir = $dir_file . "-DIR" ;
+ $xmlg['image_destination'] = $dir . "/Pictures" ;
+
+ $zipdir = $cwd ;
+ if ( isset ( $xmlg["zip_odt_path"] ) ) # Windows strange bug workaround
+ $zipdir = $xmlg["zip_odt_path"] ;
+
+ chdir ( $zipdir ) ;
+
+ # Unzip template
+ $cmd = $xmlg['unzip_odt'] ;
+ $cmd = str_replace ( '$1' , escapeshellarg ( $template_file ) , $cmd ) ;
+ $cmd = str_replace ( '$2' , escapeshellarg ( $dir ) , $cmd ) ;
+ exec ( $cmd ) ;
+
+ # Convert XML to ODT
+ chdir ( $cwd ) ;
+ if ( $format == "odt_xml" ) $content_provider->block_file_download = true
;
+ $out = $converter->articles2odt ( $xml , $xmlg ) ;
+ chdir ( $zipdir ) ;
+
+ # Create ODT structure
+ $handle = fopen ( $dir . "/content.xml" , "w" ) ;
+ if ($handle) {
+ fwrite ( $handle , $out ) ;
+ fclose ( $handle ) ;
+ # Generate temporary ODT file
+ $out_file = tempnam('', "ODT");
+ $cmd = $xmlg['zip_odt'] ;
+ $cmd = str_replace ( '$1' , escapeshellarg ( $out_file ) , $cmd ) ;
+
+ if ( $xmlg['is_windows'] ) {
+ $cmd = str_replace ( '$2' , escapeshellarg ( $dir . "/" ) , $cmd )
;
+ } else {
+ $cmd = str_replace ( '$2' , escapeshellarg ( './' ) , $cmd ) ;
+ # linux/unix zip needs to be in the directory, otherwise it will
+ # include needless parts into the directory structure
+ chdir ($dir);
+ # remove the output if it for some reason already exists
+ }
+
+ @unlink ( $out_file ) ;
+ exec ( $cmd ) ;
+
+ if ( $format == "odt" ) { # Return ODT file
+ $filename = $xmlg["book_title"] ;
+ if (!preg_match('/\.[a-zA-Z]{3}$/',$filename)) { $filename .= '.odt';
}
+ if (!preg_match('/\.[a-zA-Z]{3}$/',$out_file)) { $out_file .= '.zip';
}
+ header('Content-type: application/vnd.oasis.opendocument.text;
charset=utf-8');
+ header('Content-Disposition: inline;
filename="'.$filename.'"');
+ # XXX TODO: error handling here
+ $handle = fopen($out_file, 'rb');
+ fpassthru ( $handle ) ;
+ fclose ( $handle ) ;
+ } else { # Return XML
+ header('Content-type: text/xml; charset=utf-8');
+ print str_replace ( ">" , ">\n" , $out ) ;
+ }
+
+ # Cleanup
+ SureRemoveDir ( $dir ) ;
+ @rmdir ( $dir ) ;
+ @unlink ( $dir_file ) ;
+ @unlink ( $out_file ) ;
+ chdir ( $cwd ) ;
+ } # error occured
+
+ } else if ( $format == "docbook_xml" ) {
+ $out = $converter->articles2docbook_xml ( $xml , $xmlg ) ;
+ header('Content-type: text/xml; charset=utf-8');
+ print $out ;
+ } else if ( $format == "docbook_pdf" || $format == "docbook_html" )
{
+ $filetype = substr ( $format , 8 ) ;
+ $filename = $converter->articles2docbook_pdf ( $xml , $xmlg , strtoupper ( $filetype
) ) ;
+
+ if ( file_exists ( $filename ) ) {
+ $fp = fopen($filename, 'rb');
+ if ( $format == "docbook_pdf" ) {
+ header('Content-Type: application/pdf');
+ header("Content-Length: " . (string) filesize($filename));
+ header('Content-Disposition: attachment;
filename="'.$xmlg["book_title"].'.pdf"');
+ } else if ( $format == "docbook_html" ) {
+ header('Content-Type: text/html');
+ header("Content-Length: " . (string) filesize($filename));
+ header('Content-Disposition: inline;
filename="'.$xmlg["book_title"].'.html"');
+ }
+ fpassthru($fp);
+ fclose ( $fp ) ;
+ }
+
+ # Cleanup
+ $pdf_dir = dirname ( dirname ( $filename ) ) ;
+ SureRemoveDir ( $pdf_dir ) ;
+ @rmdir ( $pdf_dir ) ;
+ }
+ xml_cleanup ( $xml ) ;
+ exit ;
+} else { # Show the form
+ if( !defined( 'MEDIAWIKI' ) ) { # Stand-alone
+ header('Content-type: text/html; charset=utf-8');
+ print "
+<html><head></head><body>
+<h1>Magnus' magic MediaWiki-to-XML-to-stuff converter</h1>
+<p>All written in PHP - so portable, <s>so incredibly slow...</s>
<i>about as fast as the original MediaWiki parser!</i> <small>(For the
source, see <a
href='http://www.mediawiki.org/wiki/Subversion'>here</a>, trunk
'wiki2xml', directory 'php')</small></p>" ;
+ print get_form () ;
+ print "</body></html>" ;
+ } else { # MediaWiki extension
+ $out = get_form ( true ) ;
+ }
+
+}
+
+#<input type='checkbox' name='resolvetemplates' value='1'
checked>Automatically resolve templates</input><br/>
+
+?>
Added: projects/wiki/extensions/wiki2xml/wiki2xml.php
===================================================================
--- projects/wiki/extensions/wiki2xml/wiki2xml.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/wiki2xml.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,1452 @@
+<?php
+# Copyright by Magnus Manske (2005)
+# Released under GPL
+
+$wiki2xml_authors = array () ;
+
+class wiki2xml
+ {
+ var $cnt = 0 ; # For debugging purposes
+ var $protocols = array ( "http" , "https" , "news" ,
"ftp" , "irc" , "mailto" ) ;
+ var $errormessage = "ERROR!" ;
+ var $compensate_markup_errors = true;
+ var $auto_fill_templates = 'all' ; # Will try and replace templates right
inline, instead of using <template> tags; requires global $content_provider
+ var $use_space_tag = true ; # Use <space/> instead of spaces before and after
tags
+ var $allowed = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890
+-#:;.,%="\'\\' ;
+ var $directhtmltags = array (
+ "b" => "xhtml:b",
+ "i" => "xhtml:i",
+ "u" => "xhtml:u",
+ "s" => "xhtml:s",
+ "p" => "xhtml:p",
+ "br" => "xhtml:br",
+ "em" => "xhtml:em",
+ "div" => "xhtml:div",
+ "span" => "xhtml:span",
+ "big" => "xhtml:big",
+ "small" => "xhtml:small",
+ "sub" => "xhtml:sub",
+ "sup" => "xhtml:sup",
+ "font" => "xhtml:font",
+ "center" => "xhtml:center",
+ "table" => "xhtml:table",
+ "tr" => "xhtml:tr",
+ "th" => "xhtml:th",
+ "td" => "xhtml:td",
+ "pre" => "xhtml:pre",
+ "code" => "xhtml:code",
+ "caption" => "xhtml:caption",
+ "cite" => "xhtml:cite",
+ "ul" => "xhtml:ul",
+ "ol" => "xhtml:ol",
+ "li" => "xhtml:li",
+ "tt" => "xhtml:tt",
+ "h1" => "xhtml:h1",
+ "h2" => "xhtml:h2",
+ "h3" => "xhtml:h3",
+ "h4" => "xhtml:h4",
+ "h5" => "xhtml:h5",
+ "h6" => "xhtml:h6",
+ "h7" => "xhtml:h7",
+ "h8" => "xhtml:h8",
+ "h9" => "xhtml:h9",
+ ) ;
+
+ var $w ; # The wiki text
+ var $wl ; # The wiki text length
+ var $bold_italics ;
+ var $tables = array () ; # List of open tables
+ var $profile = array () ;
+
+ # Some often used functions
+
+ /**
+ * Matches a function to the current text (default:once)
+ */
+ function once ( &$a , &$xml , $f , $atleastonce = true , $many = false )
+ {
+ $f = "p_{$f}" ;
+ $cnt = 0 ;
+# print $f . " : " . htmlentities ( substr ( $this->w , $a , 20 ) ) .
"<br/>" ; flush () ;
+# if ( !isset ( $this->profile[$f] ) ) $this->profile[$f] = 0 ; # PROFILING
+ do {
+# $this->profile[$f]++ ; # PROFILING
+ $matched = $this->$f ( $a , $xml ) ;
+ if ( $matched && $many ) $again = true ;
+ else $again = false ;
+ if ( $matched ) $cnt++ ;
+ } while ( $again ) ;
+ if ( !$atleastonce ) return true ;
+ if ( $cnt > 0 ) return true ;
+ return false ;
+ }
+
+ function onceormore ( &$a , &$xml , $f )
+ {
+ return $this->once ( $a , $xml , $f , true , true ) ;
+ }
+
+ function nextis ( &$a , $t , $movecounter = true )
+ {
+ if ( substr ( $this->w , $a , strlen ( $t ) ) != $t ) return false ;
+ if ( $movecounter ) $a += strlen ( $t ) ;
+ return true ;
+ }
+
+ function nextchar ( &$a , &$x )
+ {
+ if ( $a >= $this->wl ) return false ;
+ $x .= htmlspecialchars ( $this->w[$a] ) ;
+ $a++ ;
+ return true ;
+ }
+
+ function ischaracter ( $c )
+ {
+ if ( $c >= 'A' && $c <= 'Z' ) return true ;
+ if ( $c >= 'a' && $c <= 'z' ) return true ;
+ return false ;
+ }
+
+ function skipblanks ( &$a , $blank = " " )
+ {
+ while ( $a < $this->wl )
+ {
+ if ( $this->w[$a] != $blank ) return ;
+ $a++ ;
+ }
+ }
+
+ ##############
+
+
+ function p_internal_link_target ( &$a , &$xml , $closeit = "]]" )
+ {
+ return $this->p_internal_link_text ( $a , $xml , true , $closeit ) ;
+ }
+
+ function p_internal_link_text2 ( &$a , &$xml , $closeit )
+ {
+ $bi = $this->bold_italics ;
+ $ret = $this->p_internal_link_text ( $a , $xml , false , $closeit , false ) ;
+ if ( $closeit == ']]' && '' != $this->bold_italics ) $ret =
false ; # Dirty hack to ensure good XML; FIXME!!!
+ return $ret ;
+ }
+
+ function p_internal_link_text ( &$a , &$xml , $istarget = false , $closeit =
"]]" , $mark = true )
+ {
+ $b = $a ;
+ $x = "" ;
+ if ( $b >= $this->wl ) return false ;
+ $bi = $this->bold_italics ;
+ $this->bold_italics = '' ;
+ $closeit1 = $closeit[0] ;
+ while ( 1 )
+ {
+ if ( $b >= $this->wl ) {
+ $this->bold_italics = $bi ;
+ return false ;
+ }
+ $c = $this->w[$b] ;
+ if ( $closeit != "}}" && $c == "\n" ) {
+ $this->bold_italics = $bi ;
+ return false ;
+ }
+ if ( $c == "|" ) break ;
+ if ( $c == $closeit1 && $this->nextis ( $b , $closeit , false ) ) break ;
+ if ( !$istarget ) {
+ if ( $c == "[" && $this->once ( $b , $x ,
"internal_link" ) ) continue ;
+ if ( $c == "[" && $this->once ( $b , $x ,
"external_link" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x ,
"template_variable" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x , "template" )
) continue ;
+ if ( $c == "<" && $this->once ( $b , $x , "html" )
) continue ;
+ if ( $c == "'" && $this->p_bold ( $b , $x ,
"internal_link_text2" , $closeit ) ) { break ; }
+ if ( $c == "'" && $this->p_italics ( $b , $x ,
"internal_link_text2" , $closeit ) ) { break ; }
+ if ( $b + 10 < $this->wl &&
+ ( $this->w[$a+5] == '/' && $this->w[$a+7] == '/' )
&&
+ $this->once ( $b , $x , "external_freelink" ) ) continue ;
+ } else {
+ if ( $c == "{" && $this->once ( $b , $x , "template" )
) continue ;
+ }
+ $x .= htmlspecialchars ( $c ) ;
+ $b++ ;
+/* if ( $b >= $this->wl ) {
+ $this->bold_italics = $bi ;
+ return false ;
+ }*/
+ }
+
+ if ( $closeit == "}}" && !$istarget ) {
+ $xml .= substr ( $this->w , $a , $b - $a ) ;
+ $a = $b ;
+ $this->bold_italics = $bi ;
+ return true ;
+ }
+
+ $x = trim ( str_replace ( "\n" , "" , $x ) ) ;
+ if ( $mark )
+ {
+ if ( $istarget ) $xml .= "<target>{$x}</target>" ;
+ else $xml .= "<part>{$x}</part>" ;
+
+ }
+ else $xml .= $x ;
+ $a = $b ;
+ $this->bold_italics = $bi ;
+ return true ;
+ }
+
+ function p_internal_link_trail ( &$a , &$xml )
+ {
+ $b = $a ;
+ $x = "" ;
+ while ( 1 )
+ {
+ $c = "" ;
+
+ if ( !$this->nextchar ( $b , $c ) ) break ;
+
+ if ( $this->ischaracter ( $c ) )
+ {
+ $x .= $c ;
+ }
+ else
+ {
+ $b-- ;
+ break ;
+ }
+ }
+ if ( $x == "" ) return false ; # No link trail
+ $xml .= "<trail>{$x}</trail>" ;
+ $a = $b ;
+ return true ;
+ }
+
+ function p_internal_link ( &$a , &$xml )
+ {
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->nextis ( $b , "[[" ) ) return false ;
+ if ( !$this->p_internal_link_target ( $b , $x , "]]" ) ) return false ;
+ while ( 1 )
+ {
+ if ( $this->nextis ( $b , "]]" ) ) break ;
+ if ( !$this->nextis ( $b , "|" ) ) return false ;
+ if ( !$this->p_internal_link_text ( $b , $x , false , "]]" ) ) return
false ;
+ }
+ $this->p_internal_link_trail ( $b , $x ) ;
+ $xml .= "<link>{$x}</link>" ;
+ $a = $b ;
+ return true ;
+ }
+
+ function p_magic_variable ( &$a , &$xml )
+ {
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->nextis ( $b , "__" ) ) return false ;
+ $varname = "" ;
+ for ( $c = $b ; $c < $this->wl && $this->w[$c] != '_' ; $c++
)
+ $varname .= $this->w[$c] ;
+ if ( !$this->nextis ( $c , "__" ) ) return false ;
+ $xml .= "<magic_variable>{$varname}</magic_variable>" ;
+ $a = $c ;
+ return true ;
+ }
+
+ # Template and template variable, utilizing parts of the internal link methods
+ function p_template ( &$a , &$xml )
+ {
+ global $content_provider ;
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->nextis ( $b , "{{" ) ) return false ;
+# if ( $this->nextis ( $b , "{" , false ) ) return false ; # Template names
may not start with "{"
+ if ( !$this->p_internal_link_target ( $b , $x , "}}" ) ) return false ;
+ $target = $x ;
+ $variables = array () ;
+ $vcount = 1 ;
+ while ( 1 )
+ {
+ if ( $this->nextis ( $b , "}}" ) ) break ;
+ if ( !$this->nextis ( $b , "|" ) ) return false ;
+ $l1 = strlen ( $x ) ;
+ if ( !$this->p_internal_link_text ( $b , $x , false , "}}" ) ) return
false ;
+ $v = substr ( $x , $l1 ) ;
+ $v = explode ( "=" , $v ) ;
+ if ( count ( $v ) < 2 ) $vk = $vcount ;
+ else $vk = trim ( array_shift ( $v ) ) ;
+ $vv = array_shift ( $v ) ;
+ $variables[$vk] = $vv ;
+ if ( !isset ( $variables[$vcount] ) ) $variables[$vcount] = $vv ;
+ $vcount++ ;
+ }
+
+ $target = array_pop ( @explode ( ">" , $target , 2 ) ) ;
+ $target = array_shift ( @explode ( "<" , $target , 2 ) ) ;
+ if ( $this->auto_fill_templates == 'all' ) $replace_template = true ;
+ else if ( $this->auto_fill_templates == 'none' ) $replace_template = false
;
+ else {
+ $found = in_array ( ucfirst ( $target ) , $this->template_list ) ;
+ if ( $found AND $this->auto_fill_templates == 'these' ) $replace_template =
true ;
+ else if ( !$found AND $this->auto_fill_templates == 'notthese' )
$replace_template = true ;
+ else $replace_template = false ;
+ }
+
+ if ( substr ( $target , 0 , 1 ) == '#' ) { # Try template logic
+ $between = $this->process_template_logic ( $target , $variables ) ;
+ # Change source (!)
+ $w1 = substr ( $this->w , 0 , $a ) ;
+ $w2 = substr ( $this->w , $b ) ;
+ $this->w = $w1 . $between . $w2 ;
+ $this->wl = strlen ( $this->w ) ;
+ } else if ( $replace_template ) { # Do not generate <template> sections, but
rather replace the template call with the template text
+
+ # Get template text
+ $between = trim ( $content_provider->get_template_text ( $target ) ) ;
+ add_authors ( $content_provider->authors ) ;
+
+ # Removing <noinclude> stuff
+ $between = preg_replace( '?<noinclude>.*</noinclude>?msU',
'', $between);
+ $between = str_replace ( "<include>" , "" , $between ) ;
+ $between = str_replace ( "</include>" , "" , $between ) ;
+ $between = str_replace ( "<includeonly>" , "" , $between )
;
+ $between = str_replace ( "</includeonly>" , "" , $between )
;
+
+ # Remove HTML comments
+ $between = preg_replace( '?<!--.*-->?msU', '', $between) ;
+
+ # Replacing template variables.
+ # ATTENTION: Template variables within <nowiki> sections of templates will be
replaced as well!
+
+ if ( $a > 0 && substr ( $between , 0 , 2 ) == '{|' )
+ $between = "\n" . $between ;
+
+ $this->replace_template_variables ( $between , $variables ) ;
+
+ # Change source (!)
+ $w1 = substr ( $this->w , 0 , $a ) ;
+ $w2 = substr ( $this->w , $b ) ;
+ $this->w = $w1 . $between . $w2 ;
+ $this->wl = strlen ( $this->w ) ;
+ } else {
+ $xml .= "<template>{$x}</template>" ;
+ $a = $b ;
+ }
+ return true ;
+ }
+
+ function process_template_logic ( $title , $variables ) {
+
+ # TODO : Process title and variables for sub-template-replacements
+
+ if ( substr ( $title , 0 , 4 ) == "#if:" ) {
+ $title = trim ( substr ( $title , 4 ) ) ;
+ if ( $title == '' ) return array_pop ( $variables ) ; # ELSE
+ return array_shift ( $variables ) ; # THEN
+ }
+
+ if ( substr ( $title , 0 , 8 ) == "#switch:" ) {
+ $title = trim ( array_pop ( explode ( ':' , $title , 2 ) ) ) ;
+ foreach ( $variables AS $v ) {
+ $v = explode ( '=' , $v , 2 ) ;
+ $key = trim ( array_shift ( $v ) ) ;
+ if ( $key != $title ) continue ; # Wrong key
+ return array_pop ( $v ) ; # Correct key, return value
+ }
+ }
+
+ # BAD PARSER FUNCTION! Ignoring...
+ return $title ;
+ }
+
+ function replace_template_variables ( &$text , &$variables ) {
+ for ( $a = 0 ; $a+3 < strlen ( $text ) ; $a++ ) {
+ if ( $text[$a] != '{' ) continue ;
+ while ( $this->p_template_replace_single_variable ( $text , $a , $variables ) ) ;
+ }
+ }
+
+ function p_template_replace_single_variable ( &$text , $a , &$variables ) {
+ if ( substr ( $text , $a , 3 ) != '{{{' ) return false ;
+ $b = $a + 3 ;
+
+ # Name
+ $start = $b ;
+ $count = 0 ;
+ while ( $b < strlen ( $text ) && ( $text[$b] != '|' || $count > 0
) && ( substr ( $text , $b , 3 ) != '}}}' || $count > 0 ) ) {
+ if ( $this->p_template_replace_single_variable ( $text , $b , $variables ) )
continue ;
+ if ( $text[$b] == '{' ) $count++ ;
+ if ( $text[$b] == '}' ) $count-- ;
+ $b++ ;
+ }
+ if ( $b >= strlen ( $text ) ) return false ;
+ $name = trim ( substr ( $text , $start , $b - $start ) ) ;
+
+ # Default value
+ $value = "" ;
+ if ( $text[$b] == '|' ) {
+ $b++ ;
+ $start = $b ;
+ $count = 0 ;
+ while ( $b < strlen ( $text ) && ( substr ( $text , $b , 3 ) !=
'}}}' || $count > 0 ) ) {
+ if ( $this->p_template_replace_single_variable ( $text , $b , $variables ) )
continue ;
+ if ( $text[$b] == '{' ) $count++ ;
+ if ( $text[$b] == '}' ) $count-- ;
+ $b++ ;
+ }
+ if ( $b >= strlen ( $text ) ) return false ;
+ $value = trim ( substr ( $text , $start , $b - $start ) ) ;#$start - $b - 1 ) ) ;
+ }
+
+ // Replace
+ $b += 3 ; # }}}
+ if ( isset ( $variables[$name] ) ) {
+ $value = $variables[$name] ;
+ }
+ $text = substr ( $text , 0 , $a ) . $value . substr ( $text , $b ) ;
+
+ return true ;
+ }
+
+ function p_template_variable ( &$a , &$xml )
+ {
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->nextis ( $b , "{{{" ) ) return false ;
+ if ( !$this->p_internal_link_text ( $b , $x , false , "}}}" ) ) return
false ;
+ if ( !$this->nextis ( $b , "}}}" ) ) return false ;
+ $xml .= "<templatevar>{$x}</templatevar>" ;
+ $a = $b ;
+ return true ;
+ }
+
+ # Bold / italics
+ function p_bold ( &$a , &$xml , $recurse = "restofline" , $end =
"" )
+ {
+ return $this->p_intwined ( $a , $xml , "bold" ,
"'''" , $recurse , $end ) ;
+ }
+
+ function p_italics ( &$a , &$xml , $recurse = "restofline" , $end =
"" )
+ {
+ return $this->p_intwined ( $a , $xml , "italics" , "''"
, $recurse , $end ) ;
+ }
+
+ function p_intwined ( &$a , &$xml , $tag , $markup , $recurse , $end )
+ {
+ $b = $a ;
+ if ( !$this->nextis ( $b , $markup ) ) return false ;
+ $id = substr ( ucfirst ( $tag ) , 0 , 1 ) ;
+ $bi = $this->bold_italics ;
+ $open = false ;
+ if ( substr ( $this->bold_italics , -1 ) == $id )
+ {
+ $x = "</{$tag}>" ;
+ $this->bold_italics = substr ( $this->bold_italics , 0 , -1 ) ;
+ }
+ else
+ {
+ $pos = strpos ( $this->bold_italics , $id ) ;
+ if ( false !== $pos ) return false ; # Can't close a tag that ain't open
+ $open = true ;
+ $x = "<{$tag}>" ;
+ $this->bold_italics .= $id ;
+ }
+
+ if ( $end == "" )
+ {
+ $res = $this->once ( $b , $x , $recurse ) ;
+ }
+ else
+ {
+ $r = "p_{$recurse}" ;
+ $res = $this->$r ( $b , $x , $end ) ;
+ }
+
+ $this->bold_italics = $bi ;
+ if ( !$res )
+ {
+ return false ;
+ }
+ $xml .= $x ;
+ $a = $b ;
+ return true ;
+ }
+
+ function scanplaintext ( &$a , &$xml , $goodstop , $badstop )
+ {
+ $b = $a ;
+ $x = "" ;
+ while ( $b < $this->wl )
+ {
+ if ( $this->w[$b] == "{" && $this->once ( $b , $x ,
"template" ) ) continue ;
+ foreach ( $goodstop AS $s )
+ if ( $this->nextis ( $b , $s , false ) ) break 2 ;
+ foreach ( $badstop AS $s )
+ if ( $this->nextis ( $b , $s , false ) ) return false ;
+ $c = $this->w[$b] ;
+ $x .= htmlspecialchars ( $c ) ;
+ $b++ ;
+ }
+ if ( count ( $goodstop ) > 0 && $b >= $this->wl ) return false ; #
Reached end; not good
+ $a = $b ;
+ $xml .= $x ;
+ return true ;
+ }
+
+ # External link
+ function p_external_freelink ( &$a , &$xml , $mark = true )
+ {
+ if ( $this->wl <= $a + 10 ) return false ; # Can't have an URL shorter than
that
+ if ( $this->w[$a+5] != '/' && $this->w[$a+7] != '/' )
return false ; # Shortcut for protocols 3-6 chars length
+ $protocol = "" ;
+ $b = $a ;
+# while ( $this->w[$b] == "{" && $this->once ( $b , $x ,
"template" ) ) $b = $a ;
+ foreach ( $this->protocols AS $p )
+ {
+ if ( $this->nextis ( $b , $p . "://" ) )
+ {
+ $protocol = $p ;
+ break ;
+ }
+ }
+ if ( $protocol == "" ) return false ;
+ $x = "{$protocol}://" ;
+ while ( $b < $this->wl )
+ {
+ $c = $this->w[$b] ;
+ if ( $c == "{" && $this->once ( $b , $x , "template" )
) continue ;
+ if ( $c == "\n" || $c == " " || $c == '|' ) break ;
+ if ( !$mark && $c == "]" ) break ;
+ $x .= htmlspecialchars ( $c ) ;
+ $b++ ;
+ }
+ if ( substr ( $x , -1 ) == "." || substr ( $x , -1 ) == "," )
+ {
+ $x = substr ( $x , 0 , -1 ) ;
+ $b-- ;
+ }
+ $a = $b ;
+ $x = htmlspecialchars ( $x , ENT_QUOTES ) ;
+ if ( $mark ) $xml .= "<link type='external'
href='$x'/>" ;
+ else $xml .= $x ;
+ return true ;
+ }
+
+ function p_external_link ( &$a , &$xml , $mark = true )
+ {
+ $b = $a ;
+ if ( !$this->nextis ( $b , "[" ) ) return false ;
+ $url = "" ;
+ $c = $b ;
+ $x = "" ;
+ while ( $c < $this->wl && $this->w[$c] == "{" &&
$this->once ( $c , $x , "template" ) ) $c = $b ;
+ if ( $c >= $this->wl ) return false ;
+ $x = "" ;
+ if ( !$this->p_external_freelink ( $b , $url , false ) ) return false ;
+ $this->skipblanks ( $b ) ;
+ if ( !$this->scanplaintext ( $b , $x , array ( "]" ) , array (
"\n" ) ) ) return false ;
+ $a = $b + 1 ;
+ $xml .= "<link type='external'
href='{$url}'>{$x}</link>" ;
+ return true ;
+ }
+
+ # Heading
+ function p_heading ( &$a , &$xml )
+ {
+ if ( $a >= $this->wl || $this->w[$a] != '=' ) return false ;
+ $b = $a ;
+ $level = 0 ;
+ $h = "" ;
+ $x = "" ;
+ while ( $this->nextis ( $b , "=" ) )
+ {
+ $level++ ;
+ $h .= "=" ;
+ }
+ $this->skipblanks ( $b ) ;
+ if ( !$this->once ( $b , $x , "restofline" ) ) return false ;
+ if ( $this->compensate_markup_errors ) $x = trim ( $x ) ;
+ else if ( $x != trim ( $x ) ) $xml .= "<error type='heading'
reason='trailing blank'/>" ;
+ if ( substr ( $x , -$level ) != $h ) return false ; # No match
+
+ $x = trim ( substr ( $x , 0 , -$level ) ) ;
+ $level -= 1 ;
+ $a = $b ;
+ $xml .= "<heading level='" . ($level+1) .
"'>{$x}</heading>" ;
+ return true ;
+ }
+
+ # Line
+ # Often used function for parsing the rest of a text line
+ function p_restofline ( &$a , &$xml , $closeit = array() )
+ {
+ $b = $a ;
+ $x = "" ;
+ $override = false ;
+ while ( $b < $this->wl && !$override )
+ {
+ $c = $this->w[$b] ;
+ if ( $c == "\n" ) { $b++ ; break ; }
+ foreach ( $closeit AS $z )
+ if ( $this->nextis ( $b , $z , false ) ) break ;
+ if ( $c == "_" && $this->once ( $b , $x ,
"magic_variable" ) ) continue ;
+ if ( $c == "[" && $this->once ( $b , $x ,
"internal_link" ) ) continue ;
+ if ( $c == "[" && $this->once ( $b , $x ,
"external_link" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x ,
"template_variable" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x , "template" )
) continue ;
+ if ( $c == "{" && $this->p_table ( $b , $x ) ) continue ;
+ if ( $c == "<" && $this->once ( $b , $x , "html" ) )
continue ;
+ if ( $c == "'" && $this->once ( $b , $x , "bold" )
) { $override = true ; break ; }
+ if ( $c == "'" && $this->once ( $b , $x , "italics"
) ) { $override = true ; break ; }
+ if ( $b + 10 < $this->wl &&
+ ( $this->w[$a+5] == '/' && $this->w[$a+7] == '/' )
&&
+ $this->once ( $b , $x , "external_freelink" ) ) continue ;
+
+ # Just an ordinary character
+ $x .= htmlspecialchars ( $c ) ;
+ $b++ ;
+ if ( $b >= $this->wl ) break ;
+ }
+ if ( !$override && $this->bold_italics != "" )
+ {
+ return false ;
+ }
+ $xml .= $x ;
+ $a = $b ;
+ return true ;
+ }
+
+ function p_line ( &$a , &$xml , $force )
+ {
+ if ( $a >= $this->wl ) return false ; # Already at the end of the text
+ $c = $this->w[$a] ;
+ if ( !$force )
+ {
+ if ( $c == '*' || $c == ':' || $c == '#' || $c == ';'
|| $c == ' ' || $c == "\n" ) return false ; # Not a suitable beginning
+ if ( $this->nextis ( $a , "{|" , false ) ) return false ; # Table
+ if ( count ( $this->tables ) > 0 && $this->nextis ( $a ,
"|" , false ) ) return false ; # Table
+ if ( count ( $this->tables ) > 0 && $this->nextis ( $a ,
"!" , false ) ) return false ; # Table
+ if ( $this->nextis ( $a , "=" , false ) ) return false ; # Heading
+ if ( $this->nextis ( $a , "----" , false ) ) return false ; # <hr>
+ }
+ $this->bold_italics = "" ;
+ return $this->once ( $a , $xml , "restofline" ) ;
+ }
+
+ function p_blankline ( &$a , &$xml )
+ {
+ if ( $this->nextis ( $a , "\n" ) ) return true ;
+ return false ;
+ }
+
+ function p_block_lines ( &$a , &$xml , $force = false )
+ {
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->p_line ( $b , $x , $force ) ) return false ;
+ while ( $this->p_line ( $b , $x , false ) ) ;
+ while ( $this->p_blankline ( $b , $x ) ) ; # Skip coming blank lines
+ $xml .= "<paragraph>{$x}</paragraph>" ;
+ $a = $b ;
+ return true ;
+ }
+
+
+
+ # PRE block
+ # Parses a line starting with ' '
+ function p_preline ( &$a , &$xml )
+ {
+ if ( $a >= $this->wl ) return false ; # Already at the end of the text
+ if ( $this->w[$a]!= ' ' ) return false ; # Not a preline
+
+ $a++ ;
+ $this->bold_italics = "" ;
+ $x = "" ;
+ $ret = $this->once ( $a , $x , "restofline" ) ;
+ if ( $ret ) {
+ $xml .= "<preline>" . $x . "</preline>" ;
+ }
+ return $ret ;
+ }
+
+ # Parses a block of lines each starting with ' '
+ function p_block_pre ( &$a , &$xml )
+ {
+ $x = "" ;
+ $b = $a ;
+ if ( !$this->once ( $b , $x , "preline" , true , true ) ) return false ;
+ $this->once ( $b , $x , "blankline" , false , true ) ;
+ $xml .= "<preblock>{$x}</preblock>" ;
+ $a = $b ;
+ return true ;
+ }
+
+ # LIST block
+ # Returns a list tag depending on the wiki markup
+ function listtag ( $c , $open = true )
+ {
+ if ( !$open ) return "</list>" ;
+ $r = "" ;
+ if ( $c == '#' ) $r = "numbered" ;
+ if ( $c == '*' ) $r = "bullet" ;
+ if ( $c == ':' ) $r = "ident" ;
+ if ( $c == ';' ) $r = "def" ;
+ if ( $r != "" ) $r = " type='{$r}'" ;
+ $r = "<list{$r}>" ;
+ return $r ;
+ }
+
+ # Opens/closes list tags
+ function fixlist ( $last , $cur )
+ {
+ $r = "" ;
+ $olast = $last ;
+ $ocur = $cur ;
+ $ocommon = "" ;
+
+ # Remove matching parts
+ while ( $last != "" && $cur != "" && $last[0] ==
$cur[0] )
+ {
+ $ocommon = $cur[0] ;
+ $cur = substr ( $cur , 1 ) ;
+ $last = substr ( $last , 1 ) ;
+ }
+
+ # Close old tags
+ $fixitemtag = false ;
+ if ( $last != "" && $ocommon != "" ) $fixitemtag = true ;
+ while ( $last != "" )
+ {
+ $r .= "</listitem>" . $this->listtag ( substr ( $last , -1 ) ,
false ) ;
+ $last = substr ( $last , 0 , -1 ) ;
+ }
+ if ( $fixitemtag ) $r .= "</listitem><listitem>" ;
+
+ # Open new tags
+ while ( $cur != "" )
+ {
+ $r .= $this->listtag ( $cur[0] ) . "<listitem>" ;
+ $cur = substr ( $cur , 1 ) ;
+ }
+
+ return $r ;
+ }
+
+ # Parses a single list line
+ function p_list_line ( &$a , &$xml , &$last )
+ {
+ $cur = "" ;
+ do {
+ $lcur = $cur ;
+ while ( $this->nextis ( $a , "*" ) ) $cur .= "*" ;
+ while ( $this->nextis ( $a , "#" ) ) $cur .= "#" ;
+ while ( $this->nextis ( $a , ":" ) ) $cur .= ":" ;
+ while ( $this->nextis ( $a , ";" ) ) $cur .= ";" ;
+ } while ( $cur != $lcur ) ;
+
+ $unchanged = false ;
+# if ( substr ( $cur , 0 , strlen ( $last ) ) == $last ) $unchanged = true ;
+ if ( $last == $cur ) $unchanged = true ;
+ $xml .= $this->fixlist ( $last , $cur ) ;
+
+ if ( $cur == "" ) return false ; # Not a list line
+ $last = $cur ;
+ $this->skipblanks ( $a ) ;
+
+ if ( $unchanged ) $xml .= "</listitem><listitem>" ;
+ if ( $cur == ";" ) # Definition
+ {
+ $b = $a ;
+ while ( $b < $this->wl && $this->w[$b] != "\n" &&
$this->w[$b] != ':' ) $b++ ;
+ if ( $b >= $this->wl || $this->w[$b] == "\n" )
+ {
+ $xml .= "<defkey>" ;
+ $this->p_restofline ( $a , $xml ) ;
+ $xml .= "</defkey>" ;
+ }
+ else
+ {
+ $xml .= "<defkey>" ;
+ $this->w[$b] = "\n" ;
+ $this->p_restofline ( $a , $xml ) ;
+ $xml .= "</defkey>" ;
+ $xml .= "<defval>" ;
+ $this->p_restofline ( $a , $xml ) ;
+ $xml .= "</defval>" ;
+ }
+ }
+ else $this->p_restofline ( $a , $xml ) ;
+ return true ;
+ }
+
+ # Checks for a list block ( those nasty things starting with '*', '#',
or the like...
+ function p_block_list ( &$a , &$xml )
+ {
+ $last = "" ;
+ $found = false ;
+ while ( $this->p_list_line ( $a , $xml , $last ) ) $found = true ;
+ return $found ;
+ }
+
+ # HTML
+ # This function detects a HTML tag, finds the matching close tag,
+ # parses everything in between, and returns everything as an extension.
+ # Returns false otherwise.
+ function p_html ( &$a , &$xml )
+ {
+ if ( !$this->nextis ( $a , "<" , false ) ) return false ;
+
+ $b = $a ;
+ $x = "" ;
+ $tag = "" ;
+ $closing = false ;
+ $selfclosing = false ;
+
+ if ( !$this->p_html_tag ( $b , $x , $tag , $closing , $selfclosing ) ) return false
;
+
+ if ( isset ( $this->directhtmltags[$tag] ) )
+ {
+ $tag_open = "<" . $this->directhtmltags[$tag] ;
+ $tag_close = "</" . $this->directhtmltags[$tag] . ">" ;
+ }
+ else
+ {
+ $tag_open = "<extension extension_name='{$tag}'" ;
+ $tag_close = "</extension>" ;
+ }
+
+ # Is this tag self-closing?
+ if ( $selfclosing )
+ {
+ $a = $b ;
+ $xml .= $tag_open . $x . ">" . $tag_close ;
+ return true ;
+ }
+
+ # Find the matching close tag
+ # TODO : The simple open/close counter should be replaced with a
+ # stack to allow for tolerating half-broken HTML,
+ # such as unclosed <li> tags
+ $begin = $b ;
+ $cnt = 1 ;
+ $tag2 = "" ;
+ while ( $cnt > 0 && $b < $this->wl )
+ {
+ $x2 = "" ;
+ $last = $b ;
+ if ( !$this->p_html_tag ( $b , $x2 , $tag2 , $closing , $selfclosing ) )
+ {
+ if ( $tag != "nowiki" && $this->w[$b] == '{' &&
$this->p_template ( $b , $x ) ) # Template, doesn't alter $b or $x
+ continue ;
+ $b++ ;
+ continue ;
+ }
+ if ( $tag != $tag2 ) continue ;
+ if ( $selfclosing ) continue ;
+ if ( $closing ) $cnt-- ;
+ else $cnt++ ;
+ }
+
+ if ( $cnt > 0 ) return false ; # Tag was never closed
+
+ # What happens in between?
+ $between = substr ( $this->w , $begin , $last - $begin ) ;
+
+ if ( $tag != "pre" && $tag != "nowiki" && $tag !=
"math" )
+ {
+ if ( $tag == 'gallery' ) {
+ $this->gallery2wiki ( $between ) ;
+ $tag_open = "" ;
+ $tag_close = "" ;
+ }
+
+ # Parse the part in between the tags
+ $subparser = new wiki2xml ;
+ $between2 = $subparser->parse ( $between ) ;
+
+ # Was the parsing correct?
+ if ( $between2 != $this->errormessage )
+ $between = $this->strip_single_paragraph ( $between2 ) ; # No <paragraph>
for inline HTML tags
+ else
+ $between = htmlspecialchars ( $between ) ; # Incorrect markup, use safe wiki source
instead
+ }
+ else $between = htmlspecialchars ( $between ) ; # No wiki parsing in here
+
+ $a = $b ;
+ if ( $tag_open != "" ) $xml .= $tag_open . $x . ">" ;
+ $xml .= $between ;
+ if ( $tag_close != "" ) $xml .= $tag_close ;
+ return true ;
+ }
+
+ /**
+ * Converts the lines within a <gallery> to wiki tables
+ */
+ function gallery2wiki ( &$text ) {
+ $lines = explode ( "\n" , trim ( $text ) ) ;
+ $text = "{| style='border-collapse: collapse; border: 1px solid
grey;'\n" ;
+ $cnt = 0 ;
+ foreach ( $lines AS $line ) {
+ if ( $cnt >= 4 ) {
+ $cnt = 0 ;
+ $text .= "|--\n" ;
+ }
+ $a = explode ( "|" , $line , 2 ) ;
+ if ( count ( $a ) == 1 ) { # Generate caption from file name
+ $b = $a[0] ;
+ $b = explode ( ":" , $b , 2 ) ;
+ $b = array_pop ( $b ) ;
+ $b = explode ( "." , $b ) ;
+ array_pop ( $b ) ;
+ $a[] = implode ( "." , $b ) ;
+ }
+ $link = array_shift ( $a ) ;
+ $caption = array_pop ( $a ) ;
+ $text .= "|valign=top
align=left|[[{$link}|thumb|center|]]<br/>{$caption}\n" ;
+ $cnt++ ;
+ }
+ $text .= "|}\n" ;
+ }
+
+ function strip_single_paragraph ( $s )
+ {
+ if ( substr_count ( $s , "paragraph>" ) == 2 &&
+ substr ( $s , 0 , 11 ) == "<paragraph>" &&
+ substr ( $s , -12 ) == "</paragraph>" )
+ $s = substr ( $s , 11 , -12 ) ;
+ return $s ;
+ }
+
+ # This function checks for and parses a HTML tag
+ # Only to be called from p_html, as it returns only a partial extension tag!
+ function p_html_tag ( &$a , &$xml , &$tag , &$closing ,
&$selfclosing )
+ {
+ if ( $this->w[$a] != '<' ) return false ;
+ $b = $a + 1 ;
+ $this->skipblanks ( $b ) ;
+ $tag = "" ;
+ $attrs = array () ;
+ if ( !$this->scanplaintext ( $b , $tag , array ( " " , ">" )
, array ( "\n" ) ) ) return false ;
+
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+
+ $tag = trim ( strtolower ( $tag ) ) ;
+ $closing = false ;
+ $selfclosing = false ;
+
+ # Is closing tag?
+ if ( substr ( $tag , 0 , 1 ) == "/" )
+ {
+ $tag = substr ( $tag , 1 ) ;
+ $closing = true ;
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ }
+
+ if ( substr ( $tag , -1 ) == "/" )
+ {
+ $tag = substr ( $tag , 0 , -1 ) ;
+ $selfclosing = true ;
+ }
+
+ # Parsing attributes
+ $ob = $b ;
+ $q = "" ;
+ while ( $b < $this->wl && ( $q != "" || ( $this->w[$b] !=
'>' && $this->w[$b] != '/' ) ) ) {
+ if ( $this->w[$b] == '"' || $this->w[$b] == "'" ) {
+ if ( $q == "" ) $q = $this->w[$b] ;
+ else if ( $this->w[$b] == $q ) $q = "" ;
+ }
+ $b++ ;
+ }
+ if ( $b >= $this->wl ) return false ;
+ $attrs = $this->preparse_attributes ( substr ( $this->w , $ob , $b - $ob + 1 ) )
;
+
+ # Is self closing?
+ if ( $tag == 'br' ) $selfclosing = true ; # Always regard <br> as
<br/>
+ if ( $this->w[$b] == '/' )
+ {
+ $b++ ;
+ $this->skipblanks ( $b ) ;
+ $selfclosing = true ;
+ }
+
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ if ( $this->w[$b] != '>' ) return false ;
+
+ $a = $b + 1 ;
+ if ( count ( $attrs ) > 0 )
+ {
+ $xml = " " . implode ( " " , $attrs ) ;
+ }
+ return true ;
+ }
+
+ # This function replaces templates and separates HTML attributes.
+ # It is used for both HTML tags and wiki tables
+ function preparse_attributes ( $x )
+ {
+ # Creating a temporary new parser to run the attribute list in
+ $np = new wiki2xml ;
+ $np->w = $x ;
+ $np->wl = strlen ( $x ) ;
+
+ # Replacing templates, and '<' and '>' in parameters
+ $c = 0 ;
+ $q = "" ;
+ while ( $q != "" || ( $c < $np->wl && $np->w[$c] !=
'>' && $np->w[$c] != '/' ) )
+ {
+ $y = $np->w[$c] ;
+ if ( $np->nextis ( $c , "{{" , false ) ) {
+ $xx = "" ;
+ if ( $np->p_template ( $c , $xx ) ) continue ;
+ else $c++ ;
+ } else if ( $y == "'" || $y == '"' ) {
+ if ( $q == "" ) $q = $y ;
+ else if ( $y == $q ) $q = "" ;
+ $c++ ;
+ } else if ( $q != "" && ( $y == '<' || $y ==
'>' ) ) {
+ $y = htmlentities ( $y ) ;
+ $np->w = substr ( $np->w , 0 , $c ) . $y . substr ( $np->w , $c + 1 ) ;
+ $np->wl += strlen ( $y ) - 1 ;
+ } else $c++ ;
+ if ( $c >= $np->wl ) return array () ;
+ }
+
+ $attrs = array () ;
+ $c = 0 ;
+
+ # Seeking attributes
+ while ( $np->w[$c] != '>' && $np->w[$c] != '/' )
+ {
+ $attr = "" ;
+ if ( !$np->p_html_attr ( $c , $attr ) ) break ;
+ if ( $attr != "" ) {
+ $key = array_shift ( explode ( "=" , $attr , 2 ) ) ;
+ if ( !isset ( $attrs[$key] ) && substr ( $attr , -3 , 3 ) !=
'=""' ) {
+ $attrs[$key] = $attr ;
+ }
+ }
+ $np->skipblanks ( $c ) ;
+ if ( $c >= $np->wl ) return array () ;
+ }
+ if ( substr ( $np->w , $c ) != ">" AND substr ( $np->w , $c ) !=
"/" ) return array() ;
+
+ return $attrs ;
+ }
+
+
+ # This function scans a single HTML tag attribute and returns it as <attr
name='key'>value</attr>
+ function p_html_attr ( &$a , &$xml )
+ {
+ $b = $a ;
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ $name = "" ;
+ if ( !$this->scanplaintext ( $b , $name , array ( " " , "=" ,
">" , "/" ) , array ( "\n" ) ) ) return false ;
+
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ $name = trim ( strtolower ( $name ) ) ;
+
+ # Trying to catch illegal names; should be replaced with regexp
+ $n2 = "" ;
+ for ( $q = 0 ; $q < strlen ( $name ) ; $q++ ) {
+ if ( $name[$q] == '_' OR ( $name[$q] >= 'a' AND $name[$q] <=
'z' ) )
+ $n2 .= $name[$q] ;
+ }
+ $name = trim ( $n2 ) ;
+ if ( $name == 'extension_name' ) return false ; # Not allowed, because used
internally
+ if ( $name == '' ) return false ;
+
+ # Determining value
+ $value = "" ;
+ if ( $this->w[$b] == "=" )
+ {
+ $b++ ;
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ $q = "" ;
+ $is_q = false ;
+ if ( $this->w[$b] == '"' || $this->w[$b] == "'" )
+ {
+ $q = $this->w[$b] ;
+ $b++ ;
+ if ( $b >= $this->wl ) return false ;
+ $is_q = true ;
+ }
+ while ( $b < $this->wl )
+ {
+ $c = $this->w[$b] ;
+ if ( $c == $q )
+ {
+ $b++ ;
+ if ( $is_q ) break ;
+ return false ; # Broken attribute value
+ }
+ if ( $this->nextis ( $b , "\\{$q}" ) ) # Ignore escaped quotes
+ {
+ $value .= "\\{$q}" ;
+ continue ;
+ }
+ if ( $c == "\n" ) return false ; # Line break before value end
+ if ( !$is_q && ( $c == ' ' || $c == '>' || $c ==
'/' ) ) break ;
+ $value .= htmlspecialchars ( $c ) ;
+ $b++ ;
+ }
+ }
+ if ( $name == "" ) return true ;
+
+ $a = $b ;
+ if ( $q == "'" ) $q = "'" ;
+ else $q = '"' ;
+ $xml = "{$name}={$q}{$value}{$q}" ;
+ #$xml .= "<attr name='{$name}'>{$value}</attr>" ;
+ return true ;
+ }
+
+ # Horizontal ruler (<hr> / ----)
+ function p_hr ( &$a , &$xml )
+ {
+ if ( !$this->nextis ( $a , "----" ) ) return false ;
+ $this->skipblanks ( $a , "-" ) ;
+ $this->skipblanks ( $a ) ;
+ $xml .= "<hr/>" ;
+ return true ;
+ }
+
+ # TABLE
+ # Scans the rest of the line as HTML attributes and returns the usual
<attrs><attr> string
+ function scanattributes ( &$a )
+ {
+ $x = "" ;
+ while ( $a < $this->wl )
+ {
+ if ( $this->w[$a] == "\n" ) break ;
+ $x .= $this->w[$a] ;
+ $a++ ;
+ }
+ $x .= ">" ;
+
+ $attrs = $this->preparse_attributes ( $x ) ;
+
+ $ret = "" ;
+ if ( count ( $attrs ) > 0 )
+ {
+ #$ret .= "<attrs>" ;
+ $ret .= " " . implode ( " " , $attrs ) ;
+ #$ret .= "</attrs>" ;
+ }
+ return $ret ;
+ }
+
+ # Finds the first of the given items; does *not* alter $a
+ function scanahead ( $a , $matches )
+ {
+ while ( $a < $this->wl )
+ {
+ foreach ( $matches AS $x )
+ {
+ if ( $this->nextis ( $a , $x , false ) )
+ {
+ return $a ;
+ }
+ }
+ $a++ ;
+ }
+ return -1 ; # Not found
+ }
+
+
+ # The main table parsing function
+ function p_table ( &$a , &$xml )
+ {
+ if ( $a >= $this->wl ) return false ;
+ $c = $this->w[$a] ;
+ if ( $c == "{" && $this->nextis ( $a , "{|" , false ) )
+ return $this->p_table_open ( $a , $xml ) ;
+
+# print "p_table for " . htmlentities ( substr ( $this->w , $a ) ) .
"<br/><br/>" ; flush () ;
+
+ if ( count ( $this->tables ) == 0 ) return false ; # No tables open, nothing to do
+
+ # Compatability for table cell lines starting with blanks; *evil MediaWiki parser!*
+ $b = $a ;
+ $this->skipblanks ( $b ) ;
+ if ( $b >= $this->wl ) return false ;
+ $c = $this->w[$b] ;
+
+ if ( $c != "|" && $c != "!" ) return false ; # No possible
table markup
+
+ if ( $c == "|" && $this->nextis ( $b , "|}" , false ) )
return $this->p_table_close ( $b , $xml ) ;
+
+ #if ( $this->nextis ( $a , "|" , false ) || $this->nextis ( $a ,
"!" , false ) )
+ return $this->p_table_element ( $b , $xml , true ) ;
+ }
+
+ function lasttable ()
+ {
+ return $this->tables[count($this->tables)-1] ;
+ }
+
+ # Returns the attributes for table cells
+ function tryfindparams ( &$a )
+ {
+ $n = strspn ( $this->w , $this->allowed , $a ) ; # PHP 4.3.0 and above
+# $n = strspn ( substr ( $this->w , $a ) , $this->allowed ) ; # PHP < 4.3.0
+ if ( $n == 0 ) return "" ; # None found
+
+ $b = $a + $n ;
+ if ( $b >= $this->wl ) return "" ;
+ if ( $this->w[$b] != "|" && $this->w[$b] != "!" )
return "" ;
+ if ( $this->nextis ( $b , "||" , false ) ) return "" ; # Reached
a ||, so return blank string
+ if ( $this->nextis ( $b , "!!" , false ) ) return "" ; # Reached
a ||, so return blank string
+ $this->w[$b] = "\n" ;
+ $ret = $this->scanattributes ( $a ) ;
+ $this->w[$b] = "|" ;
+ $a = $b + 1 ;
+ return $ret ;
+ }
+
+ function p_table_element ( &$a , &$xml , $newline = false )
+ {
+# print "p_table_element for " . htmlentities ( substr ( $this->w , $a ) )
. "<br/><br/>" ; flush () ;
+ $b = $a ;
+ $this->skipblanks ( $b ) ; # Compatability for table cells starting with blanks;
*evil MediaWiki parser!*
+ if ( $b >= $this->wl ) return false ; # End of the game
+ $x = "" ;
+ if ( $newline && $this->nextis ( $b , "|-" ) ) # Table row
+ {
+ $this->skipblanks ( $b , "-" ) ;
+ $this->skipblanks ( $b ) ;
+
+ $attrs = $this->scanattributes ( $b ) ;
+ if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .=
"</tablerow>" ;
+ else $this->tables[count($this->tables)-1]->is_row_open = true ;
+ $this->tables[count($this->tables)-1]->had_row = true ;
+ $x .= "<tablerow{$attrs}>" ;
+ $y = "" ;
+ $this->p_restofcell ( $b , $y ) ;
+ }
+ else if ( $newline && $this->nextis ( $b , "|+" ) ) # Table
caption
+ {
+ $this->skipblanks ( $b ) ;
+ $attrs = $this->tryfindparams ( $b ) ;
+ $this->skipblanks ( $b ) ;
+ if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .=
"</tablerow>" ;
+ $this->tables[count($this->tables)-1]->is_row_open = false ;
+
+ $y = "" ;
+ if ( !$this->p_restofcell ( $b , $y ) ) return false ;
+ $x .= "<tablecaption{$attrs}>{$y}</tablecaption>" ;
+ }
+ else # TD or TH
+ {
+ $c = $this->w[$b] ;
+ $b++ ;
+ $tag = "error" ;
+ if ( $c == '|' ) $tag = "tablecell" ;
+ else if ( $c == '!' ) $tag = "tablehead" ;
+ $attrs = $this->tryfindparams ( $b ) ;
+ $this->skipblanks ( $b ) ;
+ if ( !$this->p_restofcell ( $b , $x ) ) return false ;
+
+ if ( substr ( $x , 0 , 1 ) == "|" ) # Crude fix to compensate for MediaWiki
"tolerant" parsing
+ $x = substr ( $x , 1 ) ;
+ $x = "<{$tag}{$attrs}>{$x}</{$tag}>" ;
+ $this->tables[count($this->tables)-1]->had_cell = true ;
+ if ( !$this->tables[count($this->tables)-1]->is_row_open )
+ {
+ $this->tables[count($this->tables)-1]->is_row_open = true ;
+ $this->tables[count($this->tables)-1]->had_row = true ;
+ $x = "<tablerow>{$x}" ;
+ }
+ }
+
+ $a = $b ;
+ $xml .= $x ;
+ return true ;
+ }
+
+ # Finds the substring that composes the table cell,
+ # then runs a new parser on it
+ function p_restofcell ( &$a , &$xml )
+ {
+ # Get substring for cell
+ $b = $a ;
+ $sameline = true ;
+ $x = "" ;
+ $itables = 0 ;
+ while ( $b < $this->wl )
+ {
+ $c = $this->w[$b] ;
+ if ( $c == "<" && $this->once ( $b , $x , "html" ) )
continue ; # Up front to catch pre and nowiki
+ if ( $c == "\n" ) { $sameline = false ; }
+ if ( $c == "\n" && $this->nextis ( $b , "\n{|" ) ) {
$itables++ ; continue ; }
+ if ( $c == "\n" && $itables > 0 && $this->nextis ( $b
, "\n|}" ) ) { $itables-- ; continue ; }
+
+ if ( ( $c == "\n" && $this->nextis ( $b , "\n|" , false
) ) OR
+ ( $c == "\n" && $this->nextis ( $b , "\n!" , false )
) OR
+ ( $c == "\n" && $this->nextis ( $b , "\n |" , false )
) OR # MediaWiki parser madness compensator
+ ( $c == "\n" && $this->nextis ( $b , "\n !" , false )
) OR # MediaWiki parser madness compensator
+ ( $c == "|" && $sameline && $this->nextis ( $b ,
"||" , false ) ) OR
+ ( $c == "!" && $sameline && $this->nextis ( $b ,
"!!" , false ) ) )
+ {
+ if ( $itables == 0 ) break ;
+ $b += 2 ;
+ }
+
+ if ( $c == "[" && $this->once ( $b , $x ,
"internal_link" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x ,
"template_variable" ) ) continue ;
+ if ( $c == "{" && $this->once ( $b , $x , "template" )
) continue ;
+ $b++ ;
+ }
+
+# if ( $itables > 0 ) return false ;
+
+ # Parse cell substring
+ $s = substr ( $this->w , $a , $b - $a ) ;
+ $p = new wiki2xml ;
+ $x = $p->parse ( $s ) ;
+ if ( $x == $this->errormessage ) return false ;
+
+ $a = $b + 1 ;
+ $xml .= $this->strip_single_paragraph ( $x ) ;
+ return true ;
+ }
+
+ function p_table_close ( &$a , &$xml )
+ {
+ if ( count ( $this->tables ) == 0 ) return false ;
+ $b = $a ;
+ if ( !$this->nextis ( $b , "|}" ) ) return false ;
+ if ( !$this->tables[count($this->tables)-1]->had_row ) return false ; # Table
but no row was used
+ if ( !$this->tables[count($this->tables)-1]->had_cell ) return false ; # Table
but no cell was used
+ $x = "" ;
+ if ( $this->tables[count($this->tables)-1]->is_row_open ) $x .=
"</tablerow>" ;
+ unset ( $this->tables[count($this->tables)-1] ) ;
+ $x .= "</table>" ;
+ $xml .= $x ;
+ $a = $b ;
+ while ( $this->nextis ( $a , "\n" ) ) ;
+ return true ;
+ }
+
+ function p_table_open ( &$a , &$xml )
+ {
+ $b = $a ;
+ if ( !$this->nextis ( $b , "{|" ) ) return false ;
+
+ $this->is_row_open = false ;
+
+ # Add table to stack
+ $nt->is_row_open = false ;
+ $nt->had_row = false ;
+ $nt->had_cell = false ;
+ $this->tables[count($this->tables)] = $nt ;
+
+ $x = "<table" ;
+ $x .= $this->scanattributes ( $b ) . ">" ;
+ while ( $this->nextis ( $b , "\n" ) ) ;
+
+ while ( !$this->p_table_close ( $b , $x ) )
+ {
+ if ( $b >= $this->wl )
+ {
+ unset ( $this->tables[count($this->tables)-1] ) ;
+ return false ;
+ }
+ if ( $this->p_table_open ( $b , $x ) ) continue ;
+ if ( !$this->p_table_element ( $b , $x , true ) ) # No |} and no table element
+ {
+ unset ( $this->tables[count($this->tables)-1] ) ;
+ return false ;
+ }
+ }
+ $a = $b ;
+ $xml .= $x ;
+ return true ;
+ }
+
+ #-----------------------------------
+ # Parse the article
+ function p_article ( &$a , &$xml )
+ {
+ $x = "" ;
+ $b = $a ;
+ while ( $b < $this->wl )
+ {
+ if ( $this->onceormore ( $b , $x , "heading" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "block_lines" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "block_pre" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "block_list" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "hr" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "table" ) ) continue ;
+ if ( $this->onceormore ( $b , $x , "blankline" ) ) continue ;
+ if ( $this->p_block_lines ( $b , $x , true ) ) continue ;
+ # The last resort! It should never come to this!
+ if ( !$this->compensate_markup_errors ) $xml .= "<error
type='general' reason='no matching markup'/>" ;
+ $xml .= htmlspecialchars ( $this->w[$b] ) ;
+ $b++ ;
+ }
+ $a = $b ;
+ $xml .= $x ;
+
+# asort ( $this->profile ) ;
+# $p = "" ;
+# foreach ( $this->profile AS $k => $v ) $p .= "<p>{$k} :
{$v}</p>" ;
+# $xml = "<debug>{$this->cnt}{$p}</debug>" . $xml ;
+ return true ;
+ }
+
+ # The only function to be called directly from outside the class
+ function parse ( &$wiki )
+ {
+ $this->w = rtrim ( $wiki ) ;
+
+ # Fix line endings
+ $cc = count_chars ( $wiki , 0 ) ;
+ if ( $cc[10] > 0 && $cc[13] == 0 )
+ $this->w = str_replace ( "\r" , "\n" , $this->w ) ;
+ $this->w = str_replace ( "\r" , "" , $this->w ) ;
+
+ # Remove HTML comments
+ $this->w = preg_replace( '?<!--.*-->?msU', '', $this->w);
+
+ # Run the thing!
+# $this->tables = array () ;
+ $this->wl = strlen ( $this->w ) ;
+ $xml = "" ;
+ $a = 0 ;
+ if ( !$this->p_article ( $a , $xml ) ) return $this->errormessage ;
+
+ # XML cleanup
+ $ol = -1 ;
+ while ( $ol != strlen ( $xml ) ) {
+ $ol = strlen ( $xml ) ;
+ $xml = str_replace ( "<preline> " ,
"<preline><space/>" , $xml ) ;
+ $xml = str_replace ( "<space/> " ,
"<space/><space/>" , $xml ) ;
+ }
+ $ol = -1 ;
+ while ( $ol != strlen ( $xml ) ) {
+ $ol = strlen ( $xml ) ;
+ $xml = str_replace ( " " , " " , $xml ) ;
+ }
+ $ol = -1 ;
+ while ( $this->use_space_tag && $ol != strlen ( $xml ) ) {
+ $ol = strlen ( $xml ) ;
+ $xml = str_replace ( "> " , "><space/>" , $xml ) ;
+ $xml = str_replace ( " <" , "<space/><" , $xml ) ;
+ }
+ $xml = str_replace ( '<tablerow></tablerow>' , '' , $xml )
;
+
+ return $xml ;
+ }
+
+ }
+
+?>
Added: projects/wiki/extensions/wiki2xml/xhtml.xslt
===================================================================
--- projects/wiki/extensions/wiki2xml/xhtml.xslt (rev 0)
+++ projects/wiki/extensions/wiki2xml/xhtml.xslt 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ XSLT for wiki2xml
+
+ usage: /usr/bin/xsltproc xhtml.xslt yourfile.xml
+
+ Given a wiki syntax article, use wiki2xml by Magnus Manke to convert it
+ as a XML document. Save the XML in a file (ex: yourfile.xml) then launch
+ xlstproc that will happylly apply this stylesheet to the xml document
+ and output some XHTML.
+
+
+ Author:
+ Ashar Voultoiz <hashar(a)altern.org
+ License:
+
http://www.gnu.org/copyleft/gpl.html GNU General Public Licence 2.0 or later
+
+ Copyright © 2006 Ashar Voultoiz
+
+-->
+<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output
+ method="html" indent="yes"
+ encoding="utf-8"
+
doctype-system="http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional....
+ doctype-public="-//W3C//DTD XHTML 1.0 Transitional//EN"
+/>
+
+<xsl:template match="/">
+ <xsl:apply-templates />
+</xsl:template>
+
+<xsl:template match="/articles">
+ <xsl:apply-templates />
+</xsl:template>
+
+<xsl:template match="/articles/article">
+<html>
+<head>
+ <title><xsl:value-of select="@title" /></title>
+ <style type="text/css" media="screen,projection">@import
"http://en.wikipedia.org/w/skins-1.5/monobook/main.css";</style>
+</head>
+<body class="ns-0 ltr">
+<div id="globalWrapper">
+ <div id="column-content">
+ <div id="content">
+ <h1 class="firstHeading"><xsl:value-of select="@title"
/></h1>
+ <div id="bodyContent">
+ <h3 id="siteSub">Generated with xhtml.xslt</h3>
+ <div id="contentSub"></div>
+ </div>
+ <xsl:apply-templates />
+ </div>
+ </div>
+</div>
+</body>
+</html>
+</xsl:template>
+
+<xsl:template match="paragraph">
+ <p><xsl:apply-templates /></p>
+</xsl:template>
+
+<xsl:template match="list">
+ <xsl:choose>
+ <xsl:when test="@type = numbered">
+ <ol><xsl:apply-templates/></ol>
+ </xsl:when>
+ <xsl:when test="@type = bullet">
+ <ul><xsl:apply-templates/></ul>
+ </xsl:when>
+ <xsl:otherwise>
+ <ul><xsl:apply-templates/></ul>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+<xsl:template match="listitem">
+<li><xsl:apply-templates /></li>
+</xsl:template>
+
+<xsl:template match="space">
+<xsl:text
disable-output-escaping="yes">&nbsp;</xsl:text><xsl:apply-templates
/>
+</xsl:template>
+
+<xsl:template match="italics">
+<i><xsl:apply-templates /></i>
+</xsl:template>
+
+<xsl:template match="link">
+ <xsl:choose>
+ <xsl:when test="@type='external'" >
+ <xsl:text disable-output-escaping="yes"><a
href="</xsl:text><xsl:value-of select="@href" />
+ <xsl:text
disable-output-escaping="yes">"></xsl:text>
+ <xsl:value-of select="."/>
+ <xsl:text
disable-output-escaping="yes"></a></xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:text disable-output-escaping="yes"><a
href="http://yourhost/wiki/</xsl:text>
+ <xsl:apply-templates select="target"/>
+ <xsl:text
disable-output-escaping="yes">"></xsl:text>
+ <xsl:choose>
+ <xsl:when test="child::part">
+ <xsl:apply-templates select="part"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:apply-templates select="target"/>
+ </xsl:otherwise>
+ </xsl:choose>
+ <xsl:text
disable-output-escaping="yes"></a></xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+</xsl:template>
+
+<xsl:template match="target">
+<xsl:apply-templates/>
+</xsl:template>
+
+<xsl:template match="part">
+<xsl:apply-templates/>
+</xsl:template>
+
+</xsl:stylesheet>
Added: projects/wiki/extensions/wiki2xml/xml2docbook_xml.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xml2docbook_xml.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xml2docbook_xml.php 2007-07-04 19:14:29 UTC (rev
3798)
@@ -0,0 +1,456 @@
+<?php
+
+/**
+ * This file contains the /element/ class needed by xml2tree.php
+ * to create a tree which is then converted into DocBook XML
+ */
+
+class element {
+ var $name = '';
+ var $attrs = array ();
+ var $children = array ();
+
+ # Temporary variables for link tags
+ var $link_target = "" ;
+ var $link_trail = "" ;
+ var $link_parts = array () ;
+
+ # Variables only used by $tree root
+ var $list = array () ;
+ var $opentags = array () ;
+ var $sections = array () ;
+
+ /**
+ * Parse the children ... why won't anybody think of the children?
+ */
+ function sub_parse(& $tree) {
+ $ret = '' ;
+ $temp = "" ;
+ foreach ($this->children as $key => $child) {
+ if (is_string($child)) {
+ $temp .= $child ;
+ } elseif ($child->name != 'ATTRS') {
+ $ret .= $this->add_temp_text ( $temp ) ;
+ $sub = $child->parse ( $tree , "" , $this ) ;
+ if ( $this->name == 'LINK' ) {
+ if ( $child->name == 'TARGET' ) $this->link_target = $sub ;
+ else if ( $child->name == 'PART' ) $this->link_parts[] = $sub ;
+ else if ( $child->name == 'TRAIL' ) $this->link_trail = $sub ;
+ }
+ $ret .= $sub ;
+ }
+ }
+ return $ret . $this->add_temp_text ( $temp ) ;
+ }
+
+ function fix_text ( $s ) {
+ $s = html_entity_decode ( $s ) ;
+ filter_named_entities ( $s ) ;
+ $s = str_replace ( "&" , "&" , $s ) ;
+ $s = str_replace ( "<" , "<" , $s ) ;
+ $s = str_replace ( ">" , ">" , $s ) ;
+ return utf8_decode ( $s ) ;
+ }
+
+ function add_temp_text ( &$temp ) {
+ $s = $temp ;
+ $temp = "" ;
+ return $this->fix_text ( $s ) ;
+ }
+
+ function add_new ( $tag , &$tree ) {
+ return $this->ensure_new ( $tag , $tree , "<{$tag}>\n" ) ;
+ }
+
+ function ensure_new ( $tag , &$tree , $opttag = "" ) {
+ if ( $opttag == "" ) { # Catching special case (currently, <section>)
+ foreach ( $tree->opentags AS $o ) {
+ if ( $o == $tag ) return "" ; # Already open
+ }
+ }
+ array_push ( $tree->opentags , $tag ) ;
+ if ( $opttag == "" ) return "<{$tag}>\n" ;
+ else return $opttag ;
+ }
+
+ function close_last ( $tag , &$tree , $all = false ) {
+ $found = false ;
+ foreach ( $tree->opentags AS $o ) {
+ if ( $o == $tag ) $found = true ;
+ }
+ if ( !$found ) return "" ; # Already closed
+ $ret = "\n" ;
+ while ( count ( $tree->opentags ) > 0 ) {
+ $o = array_pop ( $tree->opentags ) ;
+ $ret .= "</{$o}>\n" ;
+ if ( $o == $tag ) {
+ if ( $all ) return $ret . $this->close_last ( $tag , $tree , true ) ;
+ else return $ret ;
+ }
+ }
+ }
+
+ function handle_extensions ( &$tree ) {
+ global $content_provider ;
+ $sub = "" ;
+ $name = strtolower ( $this->attrs['EXTENSION_NAME'] ) ;
+ $ot = $tree->opentags ;
+ $tree->opentags = array () ;
+ if ( $name == 'ref' )
+ $sub .= $this->ensure_new ( 'para' , $tree ) ;
+ $sub .= $this->sub_parse ( $tree ) ;
+ while ( count ( $tree->opentags ) > 0 )
+ $sub .= "</" . array_pop ( $tree->opentags ) . ">\n" ;
+ $tree->opentags = $ot ;
+ if ( $name == 'ref' ) {
+ $ret = '<footnote>' . $sub . '</footnote>' ;
+ } else {
+ $ret = $sub ;
+ }
+ return $ret ;
+ }
+
+ function internal_id ( $title ) {
+ #return urlencode ( $title ) ;
+ $ret = "" ;
+ for ( $a = 0 ; $a < strlen ( $title ) ; $a++ ) {
+ if ( ( $title[$a] >= 'A' && $title[$a] <= 'Z' ) ||
+ ( $title[$a] >= 'a' && $title[$a] <= 'z' ) ||
+ ( $title[$a] >= '0' && $title[$a] <= '9' ) )
+ $ret .= $title[$a] ;
+ else $ret .= "_" ;
+ }
+ return $ret ;
+ }
+
+ function handle_link ( &$tree ) {
+ global $content_provider ;
+ $ot = $tree->opentags ;
+ $sub = $this->sub_parse ( $tree ) ;
+ $tree->opentags = $ot ;
+ $link = "" ;
+ if ( isset ( $this->attrs['TYPE'] ) AND strtolower (
$this->attrs['TYPE'] ) == 'external' ) { # External link
+ $href = htmlentities ( $this->attrs['HREF'] ) ;
+ if ( trim ( $sub ) == "" ) {
+ $sub = $href ;
+ $sub = explode ( '://' , $sub , 2 ) ;
+ $sub = explode ( '/' , array_pop ( $sub ) , 2 ) ;
+ $sub = array_shift ( $sub ) ;
+ }
+ $sub = $this->fix_text ( $sub ) ;
+ $link = "<ulink
url=\"{$href}\"><citetitle>{$sub}</citetitle></ulink>"
;
+ } else { # Internal link
+ if ( count ( $this->link_parts ) > 0 ) {
+ $link = array_pop ( $this->link_parts ) ;
+ array_push ( $this->link_parts , $link ) ; # Compensating array_pop
+ }
+ $link_text = $link ;
+ if ( $link == "" ) $link = $this->link_target ;
+ $link .= $this->link_trail ;
+
+ $ns = $content_provider->get_namespace_id ( $this->link_target ) ;
+
+
+ if ( $ns == 6 ) { # Image
+ $nstext = explode ( ":" , $this->link_target , 2 ) ;
+ $target = array_pop ( $nstext ) ;
+ $nstext = array_shift ( $nstext ) ;
+
+ $text = array_pop ( $this->link_parts ) ;
+ $is_thumb = false ;
+ $align = '' ;
+ $width = '' ;
+ foreach ( $this->link_parts AS $s ) {
+ $s = trim ( $s ) ;
+ if ( $s == 'thumb' ) {
+ $is_thumb = true ;
+ if ( $align == '' ) $align = 'right' ;
+ if ( $width == '' ) $width = '200px' ;
+ }
+ }
+
+ $href = $content_provider->get_image_url ( $target ) ;
+
+ $link = "<mediaobject>\n<imageobject>\n<imagedata" ;
+ $link .= " fileref=\"{$href}\"" ;
+# if ( $align != '' ) $link .= " align='{$align}'" ; #
Deactivated until DocBook supports floating images; meanwhile:
+ if ( $align == 'center' ) $link .= " align='{$align}'" ;
+ if ( $width != '' ) $link .= " width='$width'
scalefit='1'" ; # depth='$width'
+ $link .= "/>\n</imageobject>\n" ;
+ $link .= "<textobject>\n" ;
+ $link .= "<phrase>{$text}</phrase>\n" ;
+ $link .= "</textobject>\n" ;
+ if ( $is_thumb ) {
+ $link .= "<caption>\n" ;
+ if ( substr ( $text , 0 , 5 ) == '<para' ) $link .= $text ; # Para-noia!
+ else $link .= "<para>{$text}</para>\n" ;
+ $link .= "</caption>\n" ;
+ }
+ $link .= "</mediaobject>\n" ;
+ } else if ( $ns == -9 ) { # Interlanguage link
+ $sub = $this->link_target ;
+ $nstext = explode ( ":" , $sub , 2 ) ;
+ $name = array_pop ( $nstext ) ;
+ $nstext = array_shift ( $nstext ) ;
+
+ $href = "http://{$nstext}.wikipedia.org/wiki/" . htmlentities ( $name ) ;
+ $link = "<ulink
url=\"{$href}\"><citetitle>{$sub}</citetitle></ulink>"
;
+ } else if ( $ns == -8 ) { # Category link
+ if ( $link_text == "!" || $link_text == '*' ) $link = ""
;
+ else $link = " ({$link})" ;
+ $link = "" . $this->link_target . $link . "" ;
+ } else {
+ if ( $content_provider->is_an_article ( $this->link_target ) ) {
+ $lt = $this->internal_id ( trim ( $this->link_target ) ) ;
+ $lt = str_replace ( "+" , "_" , $lt ) ;
+ $link = "<link linkend='{$lt}'>{$link}</link>" ;
+ } else {
+ #$link = "<link linkend='{$lt}'>{$link}</link>" ;
+ }
+ }
+ }
+ return $link ;
+ }
+
+ function make_tgroup ( &$tree ) {
+ $num_rows = 0 ;
+ $max_num_cols = 0 ;
+ $caption = "" ;
+ foreach ($this->children AS $key1 => $row) {
+ if (is_string($row)) continue ;
+ elseif ($row->name == 'TABLECAPTION') {
+ $caption .= $row->parse ( $tree , "DOCAPTION" , $this ) ;
+ continue ;
+ } elseif ($row->name != 'TABLEROW') continue ;
+ $num_rows++ ;
+ $num_cols = 0 ;
+ foreach ( $row->children AS $key2 => $col ) {
+ if (is_string($col)) continue ;
+ if ($col->name != 'TABLECELL' && $col->name !=
'TABLEHEAD') continue ;
+ if ( isset ( $col->attrs['COLSPAN'] ) ) $num_cols +=
$col->attrs['COLSPAN'] ;
+ else $num_cols++ ;
+ }
+ if ( $num_cols > $max_num_cols )
+ $max_num_cols = $num_cols ;
+ }
+ return "<title>{$caption}</title><tgroup
cols='{$max_num_cols}'>" ;
+ }
+
+ function top_tag ( &$tree ) {
+ if ( count ( $tree->opentags ) == 0 ) return "" ;
+ $x = array_pop ( $tree->opentags ) ;
+ array_push ( $tree->opentags , $x ) ;
+ return $x ;
+ }
+
+ function convert_xhtml_tags ( &$oldtag , &$tree , &$ret ) {
+ if ( substr ( $oldtag , 0 , 6 ) != 'XHTML:' )
+ return false ;
+
+ $tag = substr ( $oldtag , 6 ) ;
+ if ( $tag == 'UL' || $tag == 'OL' ) {
+ $ot = $tree->opentags ;
+ $r = "" ;
+ $found = false ;
+ while ( count ( $ot ) > 0 ) {
+ $x = array_pop ( $ot ) ;
+ $r .= "</{$x}>\n" ;
+ $found = true ;
+ if ( $x == 'para' ) break ;
+# if ( $x == 'listitem' ) break ;
+ $found = false ;
+ }
+ if ( !$found ) return false ;
+ $tree->opentags = $ot ;
+ if ( $tag == 'UL' ) $this->attrs['TYPE'] = "bullet" ;
+ if ( $tag == 'OL' ) $this->attrs['TYPE'] = "numbered" ;
+ $oldtag = 'LIST' ;
+ $ret .= $r ;
+ return true ;
+ } else if ( $tag == 'LI' ) {
+# $tt = $this->top_tag ( $tree ) ;
+# print $tt . "<br/>" ;
+# if ( $tt != 'itemizedlist' && $tt != 'orderedlist' ) return
false ;
+ $oldtag = 'LISTITEM' ;
+ }
+
+ return false ; # No match
+ }
+
+ /*
+ * Parse the tag
+ */
+ function parse ( &$tree , $param = "" , $root = "" ) {
+ global $content_provider ;
+ $ret = '';
+ $tag = $this->name ;
+ $close_tag = "" ;
+
+ # Pre-fixing XHTML to wiki tags
+ $xhtml_conversion = $this->convert_xhtml_tags ( $tag , $tree , $ret ) ;
+
+ if ( $tag == 'SPACE' ) {
+ return ' ' ; # Speedup
+ } else if ( $tag == 'ARTICLES' ) {
+ # dummy, to prevent default action to be called
+ } else if ( $tag == 'AUTHORS' ) {
+ # dummy, to prevent default action to be called
+ } else if ( $tag == 'AUTHOR' ) {
+ add_author ( $this->sub_parse ( $tree ) ) ;
+ return "" ;
+ } else if ( $tag == 'ARTICLE' ) {
+ $title = isset ( $this->attrs["TITLE"] ) ?
$this->attrs["TITLE"] : "Untiteled" ;
+ $id = $this->internal_id ( $title ) ;
+ $ret .= "<article id='{$id}'>\n";
+ $ret .= "<title>" . urldecode ( $title ) .
"</title>\n" ;
+ } else if ( $tag == 'LINK' ) {
+ return $this->handle_link ( $tree ) ; # Shortcut
+ } else if ( $tag == 'EXTENSION' ) {
+ return $this->handle_extensions ( $tree ) ; # Shortcut
+ } else if ( $tag == 'HEADING' ) {
+ $level = count ( $tree->sections ) ;
+ $wanted = $this->attrs["LEVEL"] ;
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ while ( $level >= $wanted ) {
+ $x = array_pop ( $tree->sections ) ;
+ if ( $x == 1 ) {
+ $ret .= $this->close_last ( "section" , $tree ) ;
+ }
+ $level-- ;
+ }
+ while ( $level < $wanted ) {
+ $level++ ;
+ if ( $level < $wanted ) {
+ array_push ( $tree->sections , 0 ) ;
+ } else {
+ $ret .= $this->ensure_new ( "section" , $tree ,
"<section>" ) ;
+ array_push ( $tree->sections , 1 ) ;
+ }
+ }
+ $ret .= "<title>" ;
+ } else if ( $tag == 'PARAGRAPH' || $tag == 'XHTML:P' ) { # Paragraph
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ } else if ( $tag == 'LIST' ) { # List
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ $list_type = strtolower ( $this->attrs['TYPE'] ) ;
+ if ( $list_type == 'bullet' || $list_type == 'ident' || $list_type ==
'def' ) $ret .= '<itemizedlist mark="opencircle">' ;
+ else if ( $list_type == 'numbered' ) $ret .= '<orderedlist
numeration="arabic">' ;
+ } else if ( $tag == 'LISTITEM' ) { # List item
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ $ret .= "<listitem>\n" ;
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+
+
+ } else if ( $tag == 'TABLE' ) { # Table
+ $ret .= $this->add_new ( "table" , $tree ) ;
+# $ret .= "<title></title>" ;
+ $ret .= $this->make_tgroup ( $tree ) ;
+ $ret .= "<tbody>" ;
+ } else if ( $tag == 'TABLEROW' ) { # Tablerow
+ $retl_before = strlen ( $ret ) ;
+ $ret .= $this->add_new ( "row" , $tree ) ;
+ $retl_after = strlen ( trim ( $ret ) ) ;
+ } else if ( $tag == 'TABLEHEAD' ) { # Tablehead
+ $ret .= $this->add_new ( "entry" , $tree ) ;
+ } else if ( $tag == 'TABLECELL' ) { # Tablecell
+ $old_ret = $ret ;
+ $ret .= $this->add_new ( "entry" , $tree ) ;
+ } else if ( $tag == 'TABLECAPTION' ) { # Tablecaption
+ if ( $param != "DOCAPTION" ) return "" ;
+# $ret .= $this->add_new ( "title" , $tree ) ;
+
+
+ } else if ( $tag == 'BOLD' || $tag == 'XHTML:STRONG' || $tag ==
'XHTML:B' ) { # <b> or '''
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<emphasis role="bold">' ;
+ $close_tag = "emphasis" ;
+ } else if ( $tag == 'ITALICS' || $tag == 'XHTML:EM' || $tag ==
'XHTML:I' ) { # <i> or ''
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<emphasis>' ;
+ $close_tag = "emphasis" ;
+ } else if ( $tag == 'XHTML:TT' ) { # <tt>
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<literal>' ;
+ $close_tag = "literal" ;
+ } else if ( $tag == 'XHTML:SUB' ) { # <sub>
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<subscript>' ;
+ $close_tag = "subscript" ;
+ } else if ( $tag == 'XHTML:SUP' ) { # <sup>
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<superscript>' ;
+ $close_tag = "superscript" ;
+ } else if ( $tag == 'XHTML:SUP' ) { # <sup>
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<superscript>' ;
+ $close_tag = "superscript" ;
+ } else if ( $tag == 'PRELINE' OR $tag == 'XHTML:PRE' ) { # <pre>
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= '<programlisting>' ;
+ $close_tag = "programlisting" ;
+ } else if ( $tag == 'DEFVAL' ) {
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ $ret .= " : " ;
+ } else { # Default : normal text
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ }
+
+
+
+ # Get the sub-items
+ $length_between = strlen ( $ret ) ;
+ if ( $tag != 'MAGIC_VARIABLE' && $tag != 'TEMPLATE' ) {
+ $ret .= $this->sub_parse ( $tree ) ;
+ }
+ $length_between = strlen ( $ret ) - $length_between ;
+
+
+
+ # Close tags
+ if ( $tag == 'LIST' ) {
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ if ( $list_type == 'bullet' || $list_type == 'ident' || $list_type ==
'def' ) $ret .= "</itemizedlist>\n" ;
+ else if ( $list_type == 'numbered' ) $ret .=
"</orderedlist>\n" ;
+ if ( $xhtml_conversion )
+ $ret .= $this->ensure_new ( "para" , $tree ) ;
+ } else if ( $tag == 'LISTITEM' ) {
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ $ret .= "</listitem>\n" ;
+ } else if ( $close_tag != "" ) {
+ $ret .= "</{$close_tag}>" ;
+ } else if ( $tag == 'HEADING' ) {
+ $ret .= "</title>\n" ;
+
+
+ } else if ( $tag == 'TABLE' ) { # Table
+ $ret .= "</tbody>" ;
+ $ret .= "</tgroup>" ;
+ $ret .= $this->close_last ( "table" , $tree ) ;
+ } else if ( $tag == 'TABLEROW' ) { # Tablerow
+ if ( strlen ( trim ( $ret ) ) == $retl_after ) {
+ $ret = substr ( $ret , 0 , $retl_before ) ;
+ $this->close_last ( "row" , $tree ) ;
+ } else $ret .= $this->close_last ( "row" , $tree ) ;
+ } else if ( $tag == 'TABLEHEAD' ) { # Tablehead !!!!
+ $ret .= $this->close_last ( "entry" , $tree ) ;
+ } else if ( $tag == 'TABLECELL' ) { # Tablecell
+ $ret .= $this->close_last ( "entry" , $tree ) ;
+# if ( $length_between == 0 ) $ret = $old_ret ;
+ } else if ( $tag == 'TABLECAPTION' ) { # Tablecaption
+# $ret .= $this->close_last ( "title" , $tree ) ;
+
+
+ } else if ( $tag == 'ARTICLE' ) {
+ $ret .= $this->close_last ( "section" , $tree , true ) ;
+ $ret .= $this->close_last ( "para" , $tree ) ;
+ $ret .= "</article>";
+ }
+
+ return $ret;
+ }
+}
+
+require_once ( "xml2tree.php" ) ; # Uses the "element" class defined
above
+
+?>
Added: projects/wiki/extensions/wiki2xml/xml2odt.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xml2odt.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xml2odt.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,667 @@
+<?php
+
+class TextStyle {
+ var $name = "" ;
+ var $bold = false ;
+ var $italics = false ;
+ var $underline = false ;
+ var $count = 0 ;
+}
+
+class XML2ODT {
+ var $tags ;
+ var $textstyle_current ;
+ var $textstyles = array () ;
+ var $listcode = "" ;
+ var $list_is_open = false ;
+ var $list_list = array () ;
+ var $list_item_name = array () ;
+ var $image_counter = 0 ;
+ var $image_frames = array () ;
+ var $table_counter = 0 ;
+ var $open_tables = array () ;
+ var $table_styles = array () ;
+ var $col_styles = array () ;
+ var $cell_styles = array () ;
+ var $col_counter = array () ;
+ var $row_counter = array () ;
+ var $footnote_counter = 0 ;
+ var $article_counter = 0 ;
+ var $footnote_index = array () ;
+ var $footnote_text = array () ;
+
+ function XML2ODT () {
+ $this->textstyle_current = new TextStyle ;
+ $this->textstyle_current->name = "T0" ;
+ $this->textstyles['T0'] = $this->textstyle_current ;
+ $this->tags = array () ;
+ }
+
+ function get_url ( $title ) {
+ global $xmlg ;
+ $url = "http://" . $xmlg["site_base_url"] .
"/index.php?title=" . urlencode ( $title ) ;
+ return $url ;
+ }
+
+ function get_footnote_id ( $name , &$text ) {
+ $name = trim ( strtolower ( $name ) ) ;
+ if ( $name != "" && isset ( $this->footnote_index[$name] ) ) {
+ $this->footnote_counter++ ;
+ if ( trim ( $text ) == "" ) $text = $this->footnote_text[$name] ;
+ return $this->footnote_counter ;
+ } else {
+ $this->footnote_counter++ ;
+ if ( $name != "" ) {
+ $this->footnote_index[$name] = $this->footnote_counter ;
+ $this->footnote_text[$name] = $text ;
+ }
+ return $this->footnote_counter ;
+ }
+ }
+
+ function get_image_frames () {
+ $ret = "" ;
+ foreach ( $this->image_frames AS $f ) {
+ $name = $f->name ;
+ $align = $f->align ;
+ $ret .= '<style:style style:name="' . $name . '"
style:family="graphic" style:parent-style-name="Graphics">' .
+ '<style:graphic-properties ' .
+ ' fo:margin-left="' . $f->left .
+ '" fo:margin-right="' . $f->right .
+ '" fo:margin-top="' . $f->top .
+ '" fo:margin-bottom="' . $f->bottom .
+ '" style:run-through="foreground" style:wrap="parallel"
style:number-wrapped-paragraphs="no-limit" ' .
+ 'style:wrap-contour="false" style:vertical-pos="top"
style:vertical-rel="paragraph" style:horizontal-pos="' .
+ $align . '" style:horizontal-rel="paragraph" ' .
+ 'style:mirror="none" fo:clip="rect(0cm 0cm 0cm 0cm)"
draw:luminance="0%" draw:contrast="0%" draw:red="0%"
draw:green="0%" draw:blue="0%" ' .
+ 'draw:gamma="100%" draw:color-inversion="false"
draw:image-opacity="100%"
draw:color-mode="standard"/></style:style>' ;
+ }
+ return $ret ;
+ }
+
+ function get_image_frame ( $align , $margin = false ) {
+ $i = "fr" . $this->image_counter ;
+ $o->name = $i ;
+ $o->align = $align ;
+ $o->left = $margin && $align == 'right' ? '0.1cm' :
'0cm' ;
+ $o->right = $margin && $align == 'left' ? '0.1cm' :
'0cm' ;
+ $o->top = '0cm' ;
+ $o->bottom = $margin ? '0.1cm' : '0cm' ;
+ $this->image_frames[$i] = $o ;
+ return $i ;
+ }
+
+ function get_table_style ( &$tag ) {
+ $this->table_counter++ ;
+ $ret = "Table" . $this->table_counter ;
+ $this->open_tables[] = $ret ;
+ $o->name = $ret ;
+ $o->cols = 0 ;
+ $this->table_styles[$ret] = $o ;
+ $this->col_counter[$ret] = 0 ;
+ $this->row_counter[$ret] = 0 ;
+ return $ret ;
+ }
+
+ function get_top_table_name () {
+ $x = array_pop ( $this->open_tables ) ;
+ $this->open_tables[] = $x ;
+ return $x ;
+ }
+
+ function get_column_style () {
+ $t = $this->get_top_table_name () ;
+ $cn = $t . "." . chr ( 65 + $this->col_counter[$t] ) ;
+ $cc = $cn . $this->row_counter[$t] ;
+ $this->col_counter[$t]++ ;
+ if ( !isset ( $this->col_styles[$cn] ) ) {
+ $this->table_styles[$t]->cols = $this->col_counter[$t] ;
+ $o->name = $cn ;
+ $this->col_styles[$cn] = $o ;
+ }
+ return $cc ;
+ }
+
+ function reset_column () {
+ $t = $this->get_top_table_name () ;
+ $this->col_counter[$t] = 0 ;
+ $this->row_counter[$t]++ ;
+ }
+
+ function get_table_styles () {
+ $ret = "" ;
+
+ # Tables
+ foreach ( $this->table_styles AS $ts ) {
+ $ret .= '<style:style style:name="' . $ts->name . '"
style:family="table">' .
+ '<style:table-properties style:width="auto"
table:align="margins"/>' .
+ '</style:style>' ;
+ }
+
+ # Columns
+ foreach ( $this->col_styles AS $cs ) {
+ $ret .= '<style:style style:name="' . $cs->name . '"
style:family="table-column">' .
+ '<style:table-column-properties style:column-width="auto"
style:rel-column-width="1*"/>' .
+ '</style:style>' ;
+ }
+
+ return $ret ;
+ }
+
+ function ensure_list_open () {
+ if ( $this->list_is_open ) return "" ;
+ $this->list_is_open = true ;
+ if ( substr ( $this->listcode , -1 ) == '#' ) $o->type =
'numbered' ;
+ else $o->type = 'bullet' ;
+ $o->depth = strlen ( $this->listcode ) ;
+ $o->number = count ( $this->list_list ) + 1 ;
+ $this->list_list[] = $o ;
+ while ( count ( $this->list_item_name ) <= $o->depth )
$this->list_item_name[] = "" ;
+ $this->list_item_name[$o->depth] = 'PL' . $o->number ;
+ return '<text:list text:style-name="List_20_' . $o->number .
'">' ;
+ }
+
+ function ensure_list_closed () {
+ if ( !$this->list_is_open ) return "" ;
+ $this->list_is_open = false ;
+ $ret = "" ;
+ $ot = $this->tags ;
+ do {
+ $x = array_pop ( $this->tags ) ;
+ $ret .= "</{$x}>" ;
+ } while ( $x != "text:list-item" && count ( $this->tags ) > 0 )
;
+ if ( $x != "text:list-item" ) {
+ $ret = "" ;
+ $this->tags = $ot ;
+ }
+ $ret .= "</text:list>" ;
+ return $ret ;
+ }
+
+ function get_text_style ( $find ) {
+ $found = "" ;
+ foreach ( $this->textstyles AS $k => $ts ) {
+ if ( $ts->bold != $find->bold ) continue ;
+ if ( $ts->italics != $find->italics ) continue ;
+ if ( $ts->underline != $find->underline ) continue ;
+ $this->textstyles[$k]->count++ ;
+ return $ts ;
+ }
+
+ # Create new style
+ $found = "T" . count ( $this->textstyles ) ;
+ $find->name = $found ;
+ $find->count = 1 ;
+ $this->textstyles[$found] = $find ;
+ return $find ;
+ }
+
+ function get_styles_xml () {
+ $ret = '<office:automatic-styles>' ;
+
+ # Default styles
+ $ret .= '<style:style style:name="PHR"
style:family="paragraph" style:parent-style-name="Standard">'
.
+ '<style:paragraph-properties fo:padding="0.074cm"
fo:border-left="none" fo:border-right="none"
fo:border-top="none" fo:border-bottom="0.002cm solid #000000"
style:join-border="false"/>' .
+ '</style:style>' .
+ '<style:style style:name="PAGEBREAK"
style:family="paragraph" style:parent-style-name="Standard">'
.
+ '<style:paragraph-properties fo:break-before="page"/>' .
+ '</style:style>' ;
+
+ # Text styles
+ foreach ( $this->textstyles AS $ts ) {
+ if ( $ts->count == 0 ) {
+ $ret .= '<style:style style:name="' . $ts->name . '"
style:family="paragraph">' ;
+ $ret .= '<style:paragraph-properties fo:text-align="justify"
style:justify-single-word="false"/>' ;
+ } else {
+ $ret .= '<style:style style:name="' . $ts->name . '"
style:family="text">' ;
+ $ret .= '<style:text-properties' ;
+ if ( $ts->italics ) $ret .= ' fo:font-style="italic"
style:font-style-asian="italic" style:font-style-complex="italic"'
;
+ if ( $ts->bold ) $ret .= ' fo:font-weight="bold"
style:font-weight-asian="bold" style:font-weight-complex="bold"'
;
+ if ( $ts->underline ) {
+ $ret .= ' style:text-underline-style="solid"
style:text-underline-width="auto"
style:text-underline-color="font-color"' ;
+ }
+ $ret .= '/>' ;
+ }
+ $ret .= '</style:style>' ;
+ }
+
+ # List styles
+ $cm = 0.3 ;
+ foreach ( $this->list_list AS $list ) {
+ $l = "List_20_" . $list->number ;
+ $p = "PL" . $list->number ;
+ $ret .= '<style:style style:name="'.$p.'"
style:family="paragraph" style:parent-style-name="Standard"
style:list-style-name="'.$l.'">' ;
+ if ( $list->depth > 1 ) {
+ $off = $cm * $list->depth ;
+ $ret .= '<style:paragraph-properties fo:margin-left="' .
+ $off .
+ 'cm" fo:margin-right="0cm" fo:text-indent="0cm"
style:auto-text-indent="false"/>' ;
+ }
+ $ret .= '</style:style>' ;
+ $ret .= '<text:list-style style:name="' . $l . '">' ;
+ $off = 0 ;
+ for ( $depth = 1 ; $depth <= 10 ; $depth++ ) {
+ $off += $cm ;
+ if ( $list->type == 'numbered' ) {
+ $ret .= '<text:list-level-style-number text:level="' .
+ $depth .
+ '" text:style-name="Numbering_20_Symbols"
style:num-suffix="." style:num-format="1">' .
+ '<style:list-level-properties text:space-before="' .
+ $off . 'cm" text:min-label-width="' . $cm .
'cm"/>' .
+ '</text:list-level-style-number>' ;
+ } else {
+ $ret .= '<text:list-level-style-bullet text:level="' .
+ $depth .
+ '" text:style-name="Bullet_20_Symbols"
style:num-suffix="." text:bullet-char="">' .
+ '<style:list-level-properties text:space-before="' .
+ $off . 'cm" text:min-label-width="' . $cm .
'cm"/>' .
+ '<style:text-properties style:font-name="StarSymbol"/>' .
+ '</text:list-level-style-bullet>' ;
+ }
+ }
+ $ret .= '</text:list-style>' ;
+ }
+
+ $ret .= $this->get_image_frames () ;
+ $ret .= $this->get_table_styles () ;
+
+ $ret .= '</office:automatic-styles>' ;
+
+ return $ret ;
+ }
+
+ function get_odt_start () {
+ $ret = "" ;
+
+ $ret .= '<office:document-content
xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0"
xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0"
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0"
xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0"
xmlns:xlink="http://www.w3.org/1999/xlink"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
xmlns:math="http://www.w3.org/1998/Math/MathML"
xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0"
xmlns:script="urn:oasis:names:tc:opendocument:xmlns:scrip!
t:1.0"
xmlns:ooo="http://openoffice.org/2004/office"
xmlns:ooow="http://openoffice.org/2004/writer"
xmlns:oooc="http://openoffice.org/2004/calc"
xmlns:dom="http://www.w3.org/2001/xml-events"
xmlns:xforms="http://www.w3.org/2002/xforms"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
office:version="1.0">' ;
+
+
+ $ret .= '<office:scripts/>
+<office:font-face-decls>
+<style:font-face style:name="Tahoma1"
svg:font-family="Tahoma"/>
+<style:font-face style:name="Lucida Sans Unicode"
svg:font-family="'Lucida Sans Unicode'"
style:font-pitch="variable"/>
+<style:font-face style:name="Tahoma" svg:font-family="Tahoma"
style:font-pitch="variable"/>
+<style:font-face style:name="Times New Roman"
svg:font-family="'Times New Roman'"
style:font-family-generic="roman" style:font-pitch="variable"/>
+<style:font-face style:name="Arial" svg:font-family="Arial"
style:font-family-generic="swiss" style:font-pitch="variable"/>
+</office:font-face-decls>' ;
+
+ $ret .= $this->get_styles_xml () ;
+ return $ret ;
+ }
+
+}
+
+
+
+class element {
+ var $name = '';
+ var $attrs = array ();
+ var $children = array ();
+
+ # Temporary variables for link tags
+ var $link_target = "" ;
+ var $link_trail = "" ;
+ var $link_parts = array () ;
+
+
+ /**
+ * Parse the children ... why won't anybody think of the children?
+ */
+ function sub_parse(& $tree) {
+ $ret = '' ;
+ $temp = "" ;
+ foreach ($this->children as $key => $child) {
+ if (is_string($child)) {
+ $temp .= $child ;
+ } elseif ($child->name != 'ATTRS') {
+ $ret .= $this->add_temp_text ( $temp ) ;
+ $sub = $child->parse ( $tree , "" , $this ) ;
+ if ( $this->name == 'LINK' ) {
+ if ( $child->name == 'TARGET' ) $this->link_target = $sub ;
+ else if ( $child->name == 'PART' ) $this->link_parts[] = $sub ;
+ else if ( $child->name == 'TRAIL' ) $this->link_trail = $sub ;
+ }
+ $ret .= $sub ;
+ }
+ }
+ return $ret . $this->add_temp_text ( $temp ) ;
+ }
+
+ function fix_text ( $s ) {
+/* $s = html_entity_decode ( $s ) ;
+ filter_named_entities ( $s ) ;
+ $s = str_replace ( "&" , "&" , $s ) ;
+ $s = str_replace ( "<" , "<" , $s ) ;
+ $s = str_replace ( ">" , ">" , $s ) ;
+ return utf8_decode ( $s ) ;*/
+ filter_named_entities ( $s ) ;
+ $s = str_replace ( "&" , "&" , $s ) ;
+ $s = str_replace ( "<" , "<" , $s ) ;
+ $s = str_replace ( ">" , ">" , $s ) ;
+ return $s ;
+ }
+
+ function add_temp_text ( &$temp ) {
+ $s = $temp ;
+ $temp = "" ;
+ return $this->fix_text ( $s ) ;
+ }
+
+ function push_tag ( $tag , $params = "" ) {
+ global $xml2odt ;
+ $n = "<" . $tag ;
+ if ( $params != "" ) $n .= " " . $params ;
+ $n .= ">" ;
+ $xml2odt->tags[] = $tag ;
+ return $n ;
+ }
+
+ function pop_tag () {
+ global $xml2odt ;
+ if ( count ( $xml2odt->tags ) == 0 ) return "" ;
+ $x = array_pop ( $xml2odt->tags ) ;
+ return "</{$x}>" ;
+ }
+
+ function top_tag () {
+ global $xml2odt ;
+ if ( count ( $xml2odt->tags ) == 0 ) return "" ;
+ $x = array_pop ( $xml2odt->tags ) ;
+ $xml2odt->tags[] = $x ;
+ return $x ;
+ }
+
+ function handle_link ( &$tree ) {
+ # <text:a xlink:type="simple"
xlink:href="http://www.google.de/">http://www.google.de</text:a>
+ global $content_provider , $xml2odt , $xmlg ;
+# $ot = $tree->opentags ;
+ $sub = $this->sub_parse ( $tree ) ;
+# $tree->opentags = $ot ;
+ $link = "" ;
+ if ( isset ( $this->attrs['TYPE'] ) AND strtolower (
$this->attrs['TYPE'] ) == 'external' ) { # External link
+ $href = htmlentities ( $this->attrs['HREF'] ) ;
+ if ( trim ( $sub ) == "" ) {
+ $sub = $href ;
+ $sub = explode ( '://' , $sub , 2 ) ;
+ $sub = explode ( '/' , array_pop ( $sub ) , 2 ) ;
+ $sub = array_shift ( $sub ) ;
+ }
+ $sub = $this->fix_text ( $sub ) ;
+ $link = '<text:a xlink:type="simple" xlink:href="' . $href .
'/">' . $sub . '</text:a>' ;
+ } else { # Internal link
+ $link = "LINK" ;
+ if ( count ( $this->link_parts ) > 0 ) {
+ $link = array_pop ( $this->link_parts ) ;
+ array_push ( $this->link_parts , $link ) ; # Compensating array_pop
+ }
+ $link_text = $link ;
+ if ( $link == "" ) $link = $this->link_target ;
+ $link .= $this->link_trail ;
+
+ $ns = $content_provider->get_namespace_id ( $this->link_target ) ;
+
+
+ if ( $ns == 6 ) { # Image
+ $nstext = explode ( ":" , $this->link_target , 2 ) ;
+ $target = array_pop ( $nstext ) ;
+ $nstext = array_shift ( $nstext ) ;
+
+ $text = array_pop ( $this->link_parts ) . $this->link_trail ;
+
+ $href = $content_provider->get_image_url ( $target ) ;
+ $xml2odt->image_counter++ ;
+ $image_file = $content_provider->copyimagefromwiki ( $target , $href ) ;
+ $image_file_full = $xmlg['image_destination'] . "/" . $image_file
;
+ $image_file = "Pictures/" . $image_file ;
+
+ # Dimensions
+ list($i_width, $i_height, $i_type, $i_attr) = @getimagesize($image_file_full);
+ if ( $i_width <= 0 ) { # Paranoia
+ $i_width = 100 ;
+ $i_height = 100 ;
+ }
+
+
+ $is_thumb = false ;
+ $align = '' ;
+ $width = '' ;
+ foreach ( $this->link_parts AS $s ) {
+ $s = trim ( $s ) ;
+ if ( $s == 'thumb' ) {
+ $is_thumb = true ;
+ if ( $align == '' ) $align = 'right' ;
+ if ( $width == '' ) $width = '400' ;
+ } else if ( substr ( trim ( strtolower ( $s ) ) , -2 ) == 'px' ) {
+ $s = trim ( strtolower ( $s ) ) ;
+ $s = trim ( substr ( $s , 0 , strlen ( $s ) - 2 ) ) ;
+ $width = $s * 2 ;
+ }
+ }
+ if ( $width == '' ) $width = $i_width ;
+ if ( $align == '' ) $align = 'left' ;
+
+ $page_width = 1000 ; # Arbitary: page width = 1000 px
+ if ( $width > $page_width ) $width = $page_width ;
+ $width = $width / 100 ;
+ $height = ( $i_height * $width ) / $i_width ;
+ $width .= "cm" ;
+ $height .= "cm" ;
+
+ $link = "" ;
+ $fr = $xml2odt->get_image_frame ( $align ) ;
+ $image_counter = $xml2odt->image_counter ;
+ if ( $is_thumb && $text != "" ) {
+ $ofr = $xml2odt->get_image_frame ( $align , true ) ;
+ $link .= '<draw:frame draw:style-name="' .
+ $ofr .
+ '" draw:name="Frame' .
+ $xml2odt->image_counter .
+ '" text:anchor-type="paragraph" svg:width="'.
+ $width .
+ '" draw:z-index="0">' ;
+ $link .= '<draw:text-box fo:min-height="' . $height .
'">' ;
+ $link .= '<text:p text:style-name="Illustration">' ;
+ }
+ $link .= '<draw:frame draw:style-name="' . $fr . '"
draw:name="Figure'.
+ $image_counter .
+ '" text:anchor-type="paragraph" svg:width="' . $width .
'" svg:height="' . $height . '"
draw:z-index="0">' .
+ '<draw:image xlink:href="' . $image_file .
+ '" xlink:type="simple" xlink:show="embed"
xlink:actuate="onLoad"/>' .
+ '</draw:frame>' ;
+ if ( $is_thumb && $text != "" ) {
+ $link .= $text ;
+ $link .= '</text:p></draw:text-box></draw:frame>' ;
+ }
+
+ } else if ( $ns == -9 ) { # Interlanguage link
+ $sub = $this->link_target ;
+ $nstext = explode ( ":" , $sub , 2 ) ;
+ $name = array_pop ( $nstext ) ;
+ $nstext = array_shift ( $nstext ) ;
+ $sub = utf8_encode ( $sub ) ;
+ $href = "http://{$nstext}.wikipedia.org/wiki/" . urlencode ( $name ) ;
+ $link = '<text:a xlink:type="simple" xlink:href="' . $href
. '/">' . $sub . '</text:a>' ;
+ if ( !$xmlg['keep_interlanguage'] ) $link = "" ; # No interlanguage
links?
+ } else if ( $ns == -8 ) { # Category link
+ if ( $link_text == "!" || $link_text == '*' ) $link = ""
;
+ else if ( $link_text != $this->link_target ) $link = " ({$link_text})"
;
+ else $link = "" ;
+ $link = "" . $this->link_target . $link . "" ;
+ if ( !$xmlg['keep_categories'] ) $link = "" ; # No category links?
+ } else {
+ if ( $content_provider->is_an_article ( $this->link_target ) ) {
+ $link = "SEITEN-INTERNER LINK" ;
+/* $lt = $this->internal_id ( trim ( $this->link_target ) ) ;
+ $lt = str_replace ( "+" , "_" , $lt ) ;
+ $link = "<link linkend='{$lt}'>{$link}</link>" ;*/
+ } else {
+ $href = $xml2odt->get_url ( $this->link_target ) ;
+ if ( count ( $this->link_parts ) == 0 ) $text = $this->link_target ;
+ else $text = array_pop ( $this->link_parts ) ;
+ $text .= $this->link_trail ;
+ $link = '<text:a xlink:type="simple" xlink:href="' . $href
. '">' . $text . '</text:a>' ;
+ }
+ }
+ }
+ return $link ;
+ }
+
+ function handle_extensions ( &$tree ) {
+ global $content_provider , $xml2odt , $xmlg ;
+ $ret = "" ;
+ $name = strtolower ( $this->attrs['EXTENSION_NAME'] ) ;
+ $sub = $this->sub_parse ( $tree ) ;
+
+ if ( $name == "ref" ) {
+ if ( isset ( $this->attrs['NAME'] ) ) $fname =
$this->attrs['NAME'] ;
+ else $fname = "" ;
+ $note_class = strtolower ( trim ( $xmlg["odt_footnote"] ) ) ;
+ $note_style = ucfirst ( $note_class ) ;
+ $id = $xml2odt->get_footnote_id ( $fname , $sub ) ;
+ $ret .= '<text:note text:id="ftn' . $id .
+ '" text:note-class="' . $note_class .
'"><text:note-citation>' .
+ $id .
+ '</text:note-citation>' ;
+ $ret .= '<text:note-body><text:p text:style-name="' .
$note_style . '">' .
+ $sub .
+ '</text:p></text:note-body>' ;
+ $ret .= '</text:note>' ;
+ } else { # Unhandeled extension
+ $ret = $sub ;
+ }
+
+ return $ret ;
+ }
+
+ function parse ( &$tree ) {
+ global $xml2odt ;
+ $ret = '';
+ $tag = $this->name; # Shortcut
+
+ $old_text_style = $xml2odt->textstyle_current ;
+ $tag_count = count ( $xml2odt->tags ) ;
+
+ # Open tag
+ if ( $tag == "SPACE" ) {
+ return '<text:s/>' ;
+ } else if ( $tag == "ARTICLE" ) {
+ if ( $xml2odt->article_counter > 0 ) {
+ $ret .= '<text:p text:style-name="PAGEBREAK"/>' ;
+ }
+ $xml2odt->article_counter++ ;
+ if ( isset ( $this->attrs['TITLE'] ) ) {
+ $title = $this->attrs['TITLE'] ;
+ $ret .= '<text:h text:style-name="Heading_20_1"
text:outline-level="1">' ;
+ $ret .= urldecode ( $title ) ;
+ $ret .= '</text:h>' ;
+ }
+
+ } else if ( $tag == "TEMPLATE" ) {
+ return "" ;
+ } else if ( $tag == "TEMPLATEVAR" ) {
+ return "" ;
+ } else if ( $tag == "MAGIC_VARIABLE" ) {
+ return "" ;
+ } else if ( $tag == "HR" ) {
+ return '<text:p text:style-name="PHR"/>' ;
+ } else if ( $tag == "EXTENSION" ) {
+ return $this->handle_extensions ( $tree ) ;
+ } else if ( $tag == "HEADING" || substr ( $tag , 0 , 7 ) ==
"XHTML:H" ) {
+ if ( $tag == "HEADING" ) $level = $this->attrs['LEVEL'] ;
+ else $level = substr ( $tag , 7 , 1 ) ;
+ $ret .= $this->push_tag ( "text:h" ,
'text:style-name="Heading_20_' . $level . '"
text:outline-level="' . $level . '"' ) ;
+ } else if ( $tag == "BOLD" || $tag == "XHTML:B" || $tag ==
"XHTML:STRONG" ) {
+ $xml2odt->textstyle_current->bold = true ;
+ $xml2odt->textstyle_current = $xml2odt->get_text_style (
$xml2odt->textstyle_current ) ;
+ $ret .= $this->push_tag ( "text:span" ,
"text:style-name=\"" . $xml2odt->textstyle_current->name .
"\"" ) ;
+ } else if ( $tag == "XHTML:U" ) {
+ $xml2odt->textstyle_current->underline = true ;
+ $xml2odt->textstyle_current = $xml2odt->get_text_style (
$xml2odt->textstyle_current ) ;
+ $ret .= $this->push_tag ( "text:span" ,
"text:style-name=\"" . $xml2odt->textstyle_current->name .
"\"" ) ;
+ } else if ( $tag == "ITALICS" || $tag == "XHTML:I" || $tag ==
"XHTML:EM" ) {
+ $xml2odt->textstyle_current->italics = true ;
+ $xml2odt->textstyle_current = $xml2odt->get_text_style (
$xml2odt->textstyle_current ) ;
+ $ret .= $this->push_tag ( "text:span" ,
"text:style-name=\"" . $xml2odt->textstyle_current->name .
"\"" ) ;
+ } else if ( $tag == "PARAGRAPH" || $tag == "XHTML:P" ) {
+ if ( $this->top_tag() != "text:p" )
+ $ret .= $this->push_tag ( "text:p" ,
'text:style-name="T0"' ) ;
+ } else if ( $tag == "LIST" || $tag == "XHTML:OL" || $tag ==
"XHTML:UL" ) {
+ $is_list = true ;
+ $ret .= $xml2odt->ensure_list_closed () ;
+ if ( $this->top_tag() == "text:p" ) {
+ $reopen_p = true ;
+ $ret .= $this->pop_tag () ;
+ }
+ if ( $tag == "LIST" ) $type = strtolower ( $this->attrs['TYPE'] )
;
+ else $type = "" ;
+ if ( $type == 'numbered' || $tag == 'XHTML:OL' ) $xml2odt->listcode
.= "#" ;
+ if ( $type == 'ident' ) $xml2odt->listcode .= " " ;
+ else $xml2odt->listcode .= "*" ;
+ } else if ( $tag == "LINK" ) {
+ return $this->handle_link ( $tree ) ;
+ } else if ( $tag == "LISTITEM" || $tag == "XHTML:LI" ) {
+ $ret .= $xml2odt->ensure_list_open () ;
+ $tag_count = count ( $xml2odt->tags ) ;
+ $p = $xml2odt->list_item_name[strlen($xml2odt->listcode)] ;
+ $ret .= $this->push_tag ( "text:list-item" ) ;
+ $ret .= $this->push_tag ( "text:p" , 'text:style-name="' .
$p . '"' ) ;
+
+ } else if ( $tag == "TABLE" ) {
+ if ( $this->top_tag() == "text:p" ) {
+ $reopen_p = true ;
+ $ret .= $this->pop_tag () ;
+ }
+ $name = $xml2odt->get_table_style ( $this ) ;
+ $ret .= $this->push_tag ( "table:table" ,
'table:style-name="' . $name . '"' ) ;
+ $other_ret = $ret ;
+ $ret = "" ;
+ } else if ( $tag == "TABLEROW" ) {
+ $xml2odt->reset_column () ;
+ $ret .= $this->push_tag ( "table:table-row" ) ;
+ } else if ( $tag == "TABLECELL" || $tag == "TABLEHEAD" ) {
+ $name = $xml2odt->get_column_style () ;
+ $ret .= $this->push_tag ( "table:table-cell" ,
'table:style_name="' . $name . '"
office:value-type="string"' ) ;
+ if ( $tag == "TABLEHEAD" ) $name = "Table_20_Heading" ;
+ else $name = "Table_20_Contents" ;
+ $ret .= $this->push_tag ( "text:p" , 'text:style-name="' .
$name . '"' ) ;
+ } else if ( $tag == "TABLECAPTION" ) {
+ return "" ; # Skipping caption
+ }
+
+ # Children
+ $ret .= $this->sub_parse ( $tree ) ;
+
+ # Close tag
+ $xml2odt->textstyle_current = $old_text_style ;
+
+ while ( $tag_count < count ( $xml2odt->tags ) ) {
+ $x = array_pop ( $xml2odt->tags ) ;
+ $ret .= "</{$x}>" ;
+ }
+
+ if ( isset ( $is_list ) ) {
+ $ret .= $xml2odt->ensure_list_closed () ;
+ $xml2odt->listcode = substr ( $xml2odt->listcode , 0 , strlen (
$xml2odt->listcode ) - 1 ) ;
+ }
+
+ if ( $tag == "TABLE" ) {
+ $t = $xml2odt->get_top_table_name () ;
+ for ( $a = 0 ; $a < $xml2odt->table_styles[$t]->cols ; $a++ ) {
+ $name = $t . "." . chr ( 65 + $a ) ;
+ $other_ret .= '<table:table-column table:style-name="' . $name .
'" table:number-columns-repeated="1"/>' ;
+ }
+ $ret = $other_ret . $ret ;
+ array_pop ( $xml2odt->open_tables ) ;
+ }
+
+ if ( isset ( $reopen_p ) ) {
+ $ret .= $this->push_tag ( "text:p" ,
'text:style-name="T0"' ) ;
+ }
+
+ return $ret ;
+ }
+}
+
+require_once ( "xml2tree.php" ) ; # Uses the "element" class defined
above
+
+?>
Added: projects/wiki/extensions/wiki2xml/xml2tree.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xml2tree.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xml2tree.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,106 @@
+<?php
+
+/**
+ * This class converts an XML string to a tree structure
+ * based on the "element" class that must be defined outside
+ * prior to including this file
+*/
+
+$ancStack = array (); // the stack with ancestral elements
+
+// START Three global functions needed for parsing, sorry guys
+/** @todo document */
+function wgXMLstartElement($parser, $name, $attrs) {
+ global $ancStack;
+
+ $newElem = new element;
+ $newElem->name = $name;
+ $newElem->attrs = $attrs;
+
+ array_push($ancStack, $newElem);
+}
+
+/** @todo document */
+function wgXMLendElement($parser, $name) {
+ global $ancStack, $rootElem;
+ // pop element off stack
+ $elem = array_pop($ancStack);
+ if (count($ancStack) == 0)
+ $rootElem = $elem;
+ else
+ // add it to its parent
+ array_push($ancStack[count($ancStack) - 1]->children, $elem);
+}
+
+/** @todo document */
+function wgXMLcharacterData($parser, $data) {
+ global $ancStack;
+ // add to parent if parent exists
+ if ($ancStack && trim ( $data ) != "") {
+ array_push($ancStack[count($ancStack) - 1]->children, $data);
+ }
+}
+// END Three global functions needed for parsing, sorry guys
+
+/**
+ * Here's the class that generates a nice tree
+ * @package MediaWiki
+ * @subpackage Experimental
+ */
+class xml2php {
+
+ /** @todo document */
+ function & scanFile($filename) {
+ global $ancStack, $rootElem;
+ $ancStack = array ();
+
+ $xml_parser = xml_parser_create();
+ xml_set_element_handler($xml_parser, 'wgXMLstartElement',
'wgXMLendElement');
+ xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
+ if (!($fp = fopen($filename, 'r'))) {
+ die('could not open XML input');
+ }
+ while ($data = fread($fp, 4096)) {
+ if (!xml_parse($xml_parser, $data, feof($fp))) {
+ die(sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
+ }
+ }
+ xml_parser_free($xml_parser);
+
+ // return the remaining root element we copied in the beginning
+ return $rootElem;
+ }
+
+ /** @todo document */
+ function scanString($input) {
+ global $ancStack, $rootElem;
+ $ancStack = array ();
+
+ $xml_parser = xml_parser_create();
+ xml_set_element_handler($xml_parser, 'wgXMLstartElement',
'wgXMLendElement');
+ xml_set_character_data_handler($xml_parser, 'wgXMLcharacterData');
+
+ if ( is_array ( $input ) ) {
+ xml_parse($xml_parser, xml_articles_header() , false) ;
+ while ( $x = xml_shift ( $input ) ) {
+ xml_parse($xml_parser, $x, false) ;
+ }
+
+ xml_parse($xml_parser, '</articles>', true) ;
+ } else {
+ xml_parse($xml_parser, xml_articles_header() , false) ;
+ if (!xml_parse($xml_parser, $input, false)) {
+ die(sprintf("XML error: %s at line %d",
xml_error_string(xml_get_error_code($xml_parser)),
xml_get_current_line_number($xml_parser)));
+ }
+ xml_parse($xml_parser, '</articles>', true) ;
+ }
+
+ xml_parser_free($xml_parser);
+
+ // return the remaining root element we copied in the beginning
+ return $rootElem;
+ }
+
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/xml2txt.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xml2txt.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xml2txt.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,179 @@
+<?php
+
+/**
+ * This file contains the /element/ class needed by xml2tree.php
+ * to create a tree which is then converted into plain text
+ */
+
+class element {
+ var $name = '';
+ var $attrs = array ();
+ var $children = array ();
+
+ # Temporary variables for link tags
+ var $link_target = "" ;
+ var $link_trail = "" ;
+ var $link_parts = array () ;
+
+ # Variables only used by $tree root
+ var $list = array () ;
+ var $iter = 1 ;
+ var $bold = "" ;
+ var $italics = "" ;
+ var $underline = "" ;
+ var $pre_link = "" ;
+
+ /**
+ * Parse the children ... why won't anybody think of the children?
+ */
+ function sub_parse(& $tree) {
+ $ret = '' ;
+ foreach ($this->children as $key => $child) {
+ if (is_string($child)) {
+ $ret .= $child ;
+ } elseif ($child->name != 'ATTRS') {
+ $sub = $child->parse ( $tree ) ;
+ if ( $this->name == 'LINK' ) {
+ if ( $child->name == 'TARGET' ) $this->link_target = $sub ;
+ else if ( $child->name == 'PART' ) $this->link_parts[] = $sub ;
+ else if ( $child->name == 'TRAIL' ) $this->link_trail = $sub ;
+ }
+ $ret .= $sub ;
+ }
+ }
+ return $ret ;
+ }
+
+ /*
+ * Parse the tag
+ */
+ function parse ( &$tree ) {
+ global $content_provider , $wiki2xml_authors , $xmlg ;
+ $ret = '';
+ $tag = $this->name ;
+ $is_root = ( $tree->iter == 1 ) ;
+ $tree->iter++ ;
+
+ if ( $tag == 'SPACE' ) $ret .= ' ' ;
+ else if ( $tag == 'HEADING' ) $ret .= "\m\n";
+ else if ( $tag == 'PARAGRAPH' ) $ret .= "\n";
+ else if ( $tag == 'TABLECELL' ) $ret .= "\n";
+ else if ( $tag == 'TABLECAPTION' ) $ret .= "\n";
+ else if ( $tag == 'TEMPLATE' ) return "" ; # Ignore unresolved
template
+ else if ( $tag == 'AUTHOR' ) { # Catch author for display later
+ $author = $this->sub_parse ( $tree ) ;
+ if ( !in_array ( $author , $wiki2xml_authors ) )
+ $wiki2xml_authors[] = $author ;
+ return "" ;
+ }
+
+ if ( $tag == "LINK" ) {
+ $sub = $this->sub_parse ( $tree ) ;
+ $link = "" ;
+ if ( isset ( $this->attrs['TYPE'] ) AND strtolower (
$this->attrs['TYPE'] ) == 'external' ) {
+ if ( $sub != "" ) $link .= $sub . " " ;
+ $link .= '[' . $this->attrs['HREF'] . ']' ;
+ } else {
+ if ( count ( $this->link_parts ) > 0 ) $link = array_pop ( $this->link_parts
) ;
+ $link_text = $link ;
+ if ( $link == "" ) $link = $this->link_target ;
+ $link .= $this->link_trail ;
+
+ $ns = $content_provider->get_namespace_id ( $this->link_target ) ;
+
+
+ if ( $ns == 6 ) { # Surround image text with newlines
+ if ( $xmlg['text_hide_images'] ) $link = '' ;
+ else {
+ $nstext = explode ( ":" , $this->link_target , 2 ) ;
+ $nstext = "" ;
+# array_shift ( $nstext ) ;
+ $link = "\m(" . $nstext . ":" . $link . ")\n" ;
+ }
+ } else if ( $ns == -9 ) { # Adding newline to interlanguage link
+ $link = "\m" . $link ;
+ } else if ( $ns == -8 ) { # Adding newline to category link
+ if ( $link_text == "!" || $link_text == '*' ) $link = ""
;
+ else $link = " ({$link})" ;
+ $link = "\m" . $this->link_target . $link . "\n" ;
+ } else {
+ $link = $tree->pre_link . $link ;
+ }
+ }
+
+ $ret .= $link ;
+ } else if ( $tag == "LIST" ) {
+ $type = strtolower ( $this->attrs['TYPE'] ) ;
+ $k = '*' ; # Dummy
+ if ( $type == 'bullet' ) $k = "*" ;
+ else if ( $type == 'numbered' ) $k = "1" ;
+ else if ( $type == 'ident' ) $k = ">" ;
+ array_push ( $tree->list , $k ) ;
+ $ret .= $this->sub_parse ( $tree ) ;
+ array_pop ( $tree->list ) ;
+ } else if ( $tag == "LISTITEM" ) {
+ $r = "" ;
+ foreach ( $tree->list AS $k => $l ) {
+ if ( $l == '*' ) $r .= '-' ;
+ else if ( $l == '>' ) $r .= '<dd/>' ;
+ else {
+ $r .= $l . "." ;
+ }
+ }
+ $ret .= "\m" . $r . " " ;
+ $ret .= $this->sub_parse ( $tree ) ;
+ if ( $tag == "LISTITEM" ) {
+ $x = array_pop ( $tree->list ) ;
+ if ( $x == "*" || $x == ">" ) array_push ( $tree->list , $x
) ; # Keep bullet
+ else array_push ( $tree->list , $x + 1 ) ; # Increase last counter
+ }
+ } else {
+ if ( $tag == "ARTICLE" && isset ( $this->attrs["TITLE"]
) ) {
+ $ret .= strtoupper ( urldecode ( $this->attrs["TITLE"] ) ) .
"\n" ;
+ }
+ if ( $xmlg['text_hide_tables'] && ( substr ( $tag , 0 , 5 ) ==
'TABLE' ||
+ $tag == 'XHTML:TABLE' ||
+ $tag == 'XHTML:TH' ||
+ $tag == 'XHTML:CAPTION' ||
+ $tag == 'XHTML:TD' ||
+ $tag == 'XHTML:TR' ) ) {
+ $ret = '' ;
+ } else {
+ $ret .= $this->sub_parse ( $tree ) ;
+ if ( $tag == "TABLEHEAD" || $tag == "XHTML:B" || $tag ==
"XHTML:STRONG" || $tag == "BOLD" ) $ret = $tree->bold . $ret .
$tree->bold ;
+ else if ( $tag == "XHTML:I" || $tag == "XHTML:EM" || $tag ==
"ITALICS" ) $ret = $tree->italics . $ret . $tree->italics ;
+ else if ( $tag == "XHTML:U" ) $ret = $tree->underline . $ret .
$tree->underline ;
+ if ( $tag == "TABLEHEAD" ) $ret = "\n" . $ret ;
+ }
+ }
+
+ $tree->iter-- ; # Unnecessary, since not really used
+
+ if ( $is_root ) {
+ $ret = str_replace ( "\m\m" , "\m" , $ret ) ;
+ $ret = str_replace ( "\n\m" , "\n" , $ret ) ;
+ $ret = str_replace ( "\m" , "\n" , $ret ) ;
+ }
+
+ return $ret;
+ }
+}
+
+require_once ( "xml2tree.php" ) ;
+
+
+
+//_______________________________________________________________
+/*
+$infile = "Biology.xml" ;
+$xml = @file_get_contents ( $infile ) ;
+
+print htmlentities ( $xml ) . "<hr>" ;
+
+$x2t = new xml2php ;
+$tree = $x2t->scanString ( $xml ) ;
+
+$odt = new xml2odt ;
+$odt->parse ( $tree ) ;
+*/
+?>
Added: projects/wiki/extensions/wiki2xml/xml2xhtml.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xml2xhtml.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xml2xhtml.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -0,0 +1,541 @@
+<?php
+
+# Setting allowed XHTML construct list
+global $xhtml_allowed ;
+$xhtml_inline =
"a,b,br,cite,code,em,font,i,img,small,strong,span,sub,sup,tt,var,";
+$xhtml_block = "blockquote,div,dl,h1,h2,h3,h4,h5,h6,hr,ol,p,pre,table,ul,";
+$xhtml_allowed = array ( # A => B means B allowed in A
+ '' => $xhtml_block,
+ 'p' => $xhtml_inline."table",
+ 'table' => 'caption,col,colgroup,thead,tfoot,tbody,tr',
+ 'tbody' => 'tr',
+ 'tr' => 'td,th',
+ 'td' => $xhtml_inline.$xhtml_block,
+ 'th' => $xhtml_inline.$xhtml_block,
+ 'caption' => $xhtml_inline,
+ 'ul' => 'li',
+ 'ol' => 'li',
+ 'dl' => 'dt,dd',
+ 'li' => $xhtml_inline.$xhtml_block,
+ 'dt' => $xhtml_inline,
+ 'dd' => $xhtml_inline.$xhtml_block,
+ 'h1' => $xhtml_inline,
+ 'h2' => $xhtml_inline,
+ 'h3' => $xhtml_inline,
+ 'h4' => $xhtml_inline,
+ 'h5' => $xhtml_inline,
+ 'h6' => $xhtml_inline,
+ 'font' => $xhtml_inline,
+ 'div' => $xhtml_inline.$xhtml_block,
+ 'blockquote' => $xhtml_block,
+) ;
+
+$xhtml_allowed['caption'] .= $xhtml_allowed['p'] ;
+$xhtml_allowed['li'] .= $xhtml_allowed['p'] ;
+
+foreach ( $xhtml_allowed As $k => $v ) {
+ $xhtml_allowed[$k] = explode ( ',' , $v ) ;
+}
+
+
+# The class
+class XML2XHTML {
+ var $s = "" ;
+ var $tags = array () ;
+ var $ignore_counter = 0 ;
+ var $links = array () ;
+
+ function fix_text ( $s , $replace_amp = false ) {
+/* $s = html_entity_decode ( $s ) ;
+ filter_named_entities ( $s ) ;
+ $s = str_replace ( "&" , "&" , $s ) ;
+ $s = str_replace ( "<" , "<" , $s ) ;
+ $s = str_replace ( ">" , ">" , $s ) ;
+ return utf8_decode ( $s ) ;*/
+ filter_named_entities ( $s ) ;
+ if ( $replace_amp ) $s = str_replace ( "&" , "&" , $s )
;
+ $s = str_replace ( "<" , "<" , $s ) ;
+ $s = str_replace ( ">" , ">" , $s ) ;
+ return $s ;
+ }
+
+ function add ( $t ) { # Can be altered, e.g. for direct output (echo)
+ $this->s .= $t ;
+ }
+
+ function is_allowed ( $tag , $base = "" ) {
+ global $xhtml_allowed ;
+ if ( $tag == "" ) return false ;
+ if ( $base == "" ) {
+ $o = $this->top_tag () ;
+ $base = $o->tag ;
+ }
+ if ( !isset ( $xhtml_allowed[$base] ) ) return false ;
+ return in_array ( $tag , $xhtml_allowed[$base] ) ;
+ }
+
+ function filter_evil_attributes ( $tag , &$attrs ) {
+ if ( count ( $attrs ) == 0 ) return "" ;
+ $ret = "" ;
+ foreach ( $attrs AS $k => $v ) {
+ $ret .= " " . strtolower ( $k ) . '="' . str_replace (
'"' , '\"' , $v ) . '"' ;
+ }
+ return $ret ;
+ }
+
+ function add_tag ( $tag , $attrs = array () , $bogus = false ) {
+ $o->tag = $tag ;
+ $o->really_open = $this->is_allowed ( $tag ) ;
+ if ( $bogus ) $o->really_open = false ;
+ $o->close_with_previous = false ;
+ $this->tags[] = $o ;
+ if ( $o->really_open ) $this->add ( "<{$tag}" .
$this->filter_evil_attributes ( $tag , $attrs ) . ">" ) ;
+ }
+
+ function close_tag ( $tag ) {
+ if ( count ( $this->tags ) == 0 ) die ( "CLOSING NON-OPEN TAG {$tag}" ) ;
+ $x = array_pop ( $this->tags ) ;
+ if ( $tag != $x->tag ) die ( "CLOSING {$tag} instead of {$x->tag}" ) ;
+ if ( $x->really_open ) $this->add ( "</{$x->tag}>" ) ;
+
+ # Auto-close previous?
+ $o = $this->top_tag() ;
+ if ( $o->close_with_previous ) {
+ $this->close_tag ( $o->tag ) ;
+ }
+ }
+
+ function insist_on ( $tag ) {
+ global $xhtml_allowed ;
+ $o = $this->top_tag () ;
+ if ( $o->tag == $tag ) return ; # Everything OK
+
+ foreach ( $xhtml_allowed AS $k => $v ) {
+ if ( $o->tag != $k ) continue ;
+ if ( in_array ( $tag , $v ) ) return ; # Everything OK
+ }
+
+ $o->tag = $tag ;
+ $o->really_open = true ;
+ $o->close_with_previous = true ;
+ $this->tags[] = $o ;
+ $this->add ( "<{$tag}>" ) ;
+ }
+
+ function top_tag () {
+ if ( count ( $this->tags ) == 0 ) {
+ $o->tag = "" ;
+ $o->really_open = false ;
+ $o->close_with_previous = false ;
+ return $o ;
+ }
+ $x = array_pop ( $this->tags ) ;
+ $this->tags[] = $x ;
+ return $x ;
+ }
+
+
+
+
+ function tag_extension ( $open , &$attrs ) {
+ if( !defined( 'MEDIAWIKI' ) ) return ; # Only as MediaWiki extension
+
+ if ( $open ) {
+ $this->extension_name = $attrs['EXTENSION_NAME'] ;
+ $this->extension_attrs = $attrs ;
+ unset ( $this->extension_attrs['EXTENSION_NAME'] ) ;
+ $this->extension_text_before = $this->s ;
+ $this->s = "" ;
+ } else {
+ $extension_text = trim ( $this->s ) ;
+ $this->s = $this->extension_text_before ;
+ $this->extension_text_before = "" ;
+
+ global $wgParser , $wgTitle ;
+ if ( !isset ( $wgParser ) ) return ; # Paranoia
+ if ( !isset ( $wgParser->mTagHooks[$this->extension_name] ) ) return ; #
Extension has no handler
+
+ if ( $extension_text == "" ) $extension_text =
"<{$this->extension_name}/>" ;
+ else $extension_text =
"<{$this->extension_name}>{$extension_text}</{$this->extension_name}>"
;
+ $options = new ParserOptions ;
+
+ $s = $wgParser->parse ( $extension_text , $wgTitle , $options , false ) ;
+ $this->add ( $s->getText() ) ;
+ }
+ }
+
+
+ function tag_paragraph ( $open , &$attrs ) {
+ global $xmlg ;
+ if ( !isset ( $attrs['align'] ) AND $xmlg['xhtml_justify'] )
$attrs['align'] = 'justify' ;
+ if ( $open ) $this->add_tag ( "p" , $attrs ) ;
+ else $this->close_tag ( "p" ) ;
+ }
+
+ function tag_space ( $open , &$attrs ) {
+ if ( $open ) $this->add ( " " ) ;
+ }
+
+ # SIMPLE TAGS
+
+ function simple_tag ( $open , $tag ) {
+ if ( $open ) $this->add_tag ( $tag ) ;
+ else $this->close_tag ( $tag ) ;
+ }
+
+ function tag_bold ( $open , &$attrs ) { global $xmlg ; $this->simple_tag ( $open
, $xmlg['xhtml_logical_markup'] ? "strong" : "b" ) ; }
+ function tag_xhtml_b ( $open , &$attrs ) { $this->simple_tag ( $open ,
"b" ) ; }
+ function tag_xhtml_strong ( $open , &$attrs ) { $this->simple_tag ( $open ,
"strong" ) ;}
+ function tag_italics ( $open , &$attrs ) { global $xmlg ; $this->simple_tag (
$open , $xmlg['xhtml_logical_markup'] ? "em" : "i" ) ; }
+ function tag_xhtml_i ( $open , &$attrs ) { $this->simple_tag ( $open ,
"i" ) ; }
+ function tag_xhtml_em ( $open , &$attrs ) { $this->simple_tag ( $open ,
"em" ) ; }
+ function tag_xhtml_ol ( $open , &$attrs ) { $this->simple_tag ( $open ,
"ol" ) ; }
+ function tag_xhtml_ul ( $open , &$attrs ) { $this->simple_tag ( $open ,
"ul" ) ; }
+ function tag_xhtml_dl ( $open , &$attrs ) { $this->simple_tag ( $open ,
"dl" ) ; }
+ function tag_xhtml_li ( $open , &$attrs ) { $this->simple_tag ( $open ,
"li" ) ; }
+ function tag_xhtml_dt ( $open , &$attrs ) { $this->simple_tag ( $open ,
"dt" ) ; }
+ function tag_xhtml_dd ( $open , &$attrs ) { $this->simple_tag ( $open ,
"dd" ) ; }
+ function tag_xhtml_code ( $open , &$attrs ) { $this->simple_tag ( $open ,
"code" ) ; }
+ function tag_preblock ( $open , &$attrs ) { $this->simple_tag ( $open ,
"pre" ) ; }
+ function tag_preline ( $open , &$attrs ) { if ( !$open ) $this->add (
"\n" ) ; }
+
+ # MISC
+ function tag_xhtml_font ( $open , &$attrs ) {
+ if ( $open ) $this->add_tag ( "font" , $attrs ) ;
+ else $this->close_tag ( "font" ) ;
+ }
+
+ function tag_list ( $open , &$attrs ) {
+ if ( !$open ) {
+ $o = $this->top_tag () ;
+ $this->close_tag ( $o->tag ) ;
+ return ;
+ }
+ $type = $attrs['TYPE'] ;
+ if ( $type == 'bullet' ) {
+ $this->tag_xhtml_ul ( $open , $attrs ) ;
+ } else if ( $type == 'numbered' ) {
+ $this->tag_xhtml_ol ( $open , $attrs ) ;
+ } else if ( $type == 'ident' ) {
+ $this->tag_xhtml_dl ( $open , $attrs ) ;
+ } else return ;
+ }
+
+ function tag_listitem ( $open , &$attrs ) {
+ $o = $this->top_tag() ;
+ if ( !$open ) {
+ $this->close_tag ( $o->tag ) ;
+ return ;
+ }
+ if ( $o->tag == 'dl' || $o->tag == 'dt' || $o->tag ==
'dd' ) $this->tag_xhtml_dt ( $open , $attrs ) ;
+ else $this->tag_xhtml_li ( $open , $attrs ) ;
+ }
+
+ # HTML
+ function tag_xhtml_div ( $open , &$attrs ) {
+ if ( $open ) $this->add_tag ( "div" , $attrs ) ;
+ else $this->close_tag ( "div" ) ;
+ }
+
+ function tag_xhtml_span ( $open , &$attrs ) {
+ if ( $open ) $this->add_tag ( "div" , $attrs ) ;
+ else $this->close_tag ( "div" ) ;
+ }
+
+ # LINKS
+ function make_internal_link ( &$o ) {
+ global $content_provider ;
+ $text = $o->text ;
+ if ( $text == "" ) $text = $o->target ;
+ $text .= $o->trail ;
+ $ns = $content_provider->get_namespace_id ( $o->target ) ;
+
+ if ( $ns == 6 ) { # Image
+ if ( !$content_provider->do_show_images () ) {
+ return ;
+ }
+ $nstext = explode ( ":" , $o->target , 2 ) ;
+ $target = array_pop ( $nstext ) ;
+ $href = $content_provider->get_image_url ( $target ) ;
+
+ list($i_width, $i_height, $i_type, $i_attr) = @getimagesize($href);
+ if ( $i_width <= 0 ) { # Paranoia
+ $i_width = 100 ;
+ $i_height = 100 ;
+ }
+
+ $width = "" ;
+ $align = "" ;
+ $is_thumb = false ;
+ foreach ( $o->parts AS $p ) {
+ $p = strtolower ( trim ( $p ) ) ;
+ if ( $p == 'thumb' ) {
+ $is_thumb = true ;
+ if ( $align == '' ) $align = 'right' ;
+ if ( $width == '' ) $width = '200' ;
+ } else if ( $p == 'right' || $p == 'center' || $p == 'left' )
{
+ $align = $p ;
+ } else if ( substr ( $p , -2 , 2 ) == 'px' ) {
+ $width = trim ( substr ( $p , 0 , -2 ) ) ;
+ }
+ }
+
+ if ( $width == '' ) {
+ $size = "" ;
+ $divwidth = "" ;
+ } else {
+ $height = ( $i_height * $width ) / $i_width ;
+ $size = " width='{$width}' height='{$height}'" ;
+ $divwidth = $width + 2 ;
+ $divwidth = ";width={$divwidth}" ;
+ }
+
+ $s = "" ;
+ $image_page = $content_provider->get_full_url ( $o->target ) ;
+ if ( $is_thumb ) $s .= '<div class="thumb tright"><div
style="' . $divwidth . '">' ;
+ else if ( $align != '' ) $s .= "<div
style='float:{$align}{$divwidth}'>" ;
+ $s .= '<a href="' . $image_page . '" title="' .
$text . '" class="internal">' ;
+ $s .= "<img src='{$href}'{$size} alt=\"{$text}\"
longdesc=\"{$image_page}\"/>" ;
+ $s .= '</a>' ;
+ if ( $is_thumb ) {
+ $s .= '<div class="thumbcaption">' ;
+ $s .= '<div class="magnify" style="float:right">'
;
+ $s .= '<a href="' . $image_page . '"
class="internal" title="enlarge">' ;
+ $s .= '<img
src="http://en.wikipedia.org/skins-1.5/common/images/magnify-clip.pn...
width="15" height="11" alt="enlarge" />' ;
+ $s .= '</a>' ;
+ $s .= "</div>" ;
+ $s .= $text ;
+ $s .= "</div>" ;
+ }
+ if ( $is_thumb || $align != '' ) $s .= "</div>" ;
+ if ( $is_thumb ) $s .= "</div>" ;
+ $this->add ( $s ) ;
+
+ } else if ( $ns == -8 ) { # Category link
+ if ( !$content_provider->get_var ( 'keep_categories' ) ) return ;
+ } else if ( $ns == -9 ) { # Interlanguage link
+ if ( !$content_provider->get_var ( 'keep_interlanguage' ) ) return ;
+ } else { # Internal link
+ $this->add ( $content_provider->get_internal_link ( $o->target , $text ) ) ;
# For now
+ }
+ }
+
+ function tag_link ( $open , &$attrs ) {
+ if ( $open ) {
+ $o->trail = "" ;
+ $o->parts = array () ;
+ $o->target = "" ;
+ $o->type = 'internal' ;
+ $o->href = "" ;
+ $o->text = "" ;
+ if ( isset ( $attrs['TYPE'] ) ) $o->type = $attrs['TYPE'] ;
+ if ( isset ( $attrs['HREF'] ) ) $o->href = $attrs['HREF'] ;
+ $o->s = $this->s ;
+ $this->s = "" ;
+ $this->links[] = $o ;
+ } else {
+ $o = array_pop ( $this->links ) ;
+ $text = $this->s ;
+ $this->s = $o->s ;
+ if ( count ( $o->parts ) > 0 ) $o->text = array_pop ( $o->parts ) ;
+ if ( $o->type == 'internal' ) {
+ $this->make_internal_link ( $o ) ;
+ } else {
+ $this->add ( '<a href="' . $o->href . '">' .
$text . '</a>' ) ;
+ }
+ }
+ }
+
+ function tag_target ( $open , &$attrs ) {
+ if ( $open ) return ;
+ $o = array_pop ( $this->links ) ;
+ $o->target = $this->s ;
+ $this->s = "" ;
+ $this->links[] = $o ;
+ }
+
+ function tag_part ( $open , &$attrs ) {
+ if ( $open ) return ;
+ $o = array_pop ( $this->links ) ;
+ $o->parts[] = $this->s ;
+ $this->s = "" ;
+ $this->links[] = $o ;
+ }
+
+ function tag_trail ( $open , &$attrs ) {
+ if ( $open ) return ;
+ $o = array_pop ( $this->links ) ;
+ $o->trail = $this->s ;
+ $this->s = "" ;
+ $this->links[] = $o ;
+ }
+
+
+ # IGNORE TAGS
+ function ignore ( $open ) {
+ if ( $open ) $this->ignore_counter++ ;
+ else $this->ignore_counter-- ;
+ }
+
+ function tag_template ( $open , &$attrs ) { $this->ignore ( $open ) ; }
+ function tag_templatevar ( $open , &$attrs ) { $this->ignore ( $open ) ; }
+ function tag_magic_variable ( $open , &$attrs ) { $this->ignore ( $open ) ; }
+
+ # HEADINGS
+ function tag_heading ( $open , &$attrs , $level = "" ) {
+ if ( $level == "" ) $level = $attrs['LEVEL'] ;
+ if ( $level > 6 ) $level = 6 ; # Paranoia
+ if ( $open ) {
+ $this->add_tag ( "h{$level}" ) ;
+ } else {
+ $o = $this->top_tag() ;
+ $this->close_tag ( $o->tag ) ;
+ }
+ }
+
+ function tag_xhtml_h1 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'1' ) ; }
+ function tag_xhtml_h2 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'2' ) ; }
+ function tag_xhtml_h3 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'3' ) ; }
+ function tag_xhtml_h4 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'4' ) ; }
+ function tag_xhtml_h5 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'5' ) ; }
+ function tag_xhtml_h6 ( $open , &$attrs ) { $this->tag_heading ( $open , $attrs ,
'6' ) ; }
+
+ # TABLES
+ function tag_table ( $open , &$attrs ) {
+ $o = $this->top_tag() ;
+ if ( $o->tag == "p" && $o->really_open ) {
+ $this->close_tag ( 'p' ) ;
+ $this->add_tag ( "p" , array() , true ) ;
+ }
+ if ( $open ) {
+ $this->add_tag ( "table" , $attrs ) ;
+ } else {
+ $this->close_tag ( "table" ) ;
+ }
+ }
+
+ function tag_tablecaption ( $open , &$attrs ) {
+ if ( $open ) {
+ $this->insist_on ( "table" ) ;
+ $this->add_tag ( "caption" , $attrs ) ;
+ } else {
+ $this->close_tag ( "caption" ) ;
+ }
+ }
+
+ function tag_tablerow ( $open , &$attrs ) {
+ if ( $open ) {
+ $this->insist_on ( "table" ) ;
+ $this->add_tag ( "tr" , $attrs ) ;
+ } else {
+ $this->close_tag ( "tr" ) ;
+ }
+ }
+
+ function tag_tablecell ( $open , &$attrs ) {
+ if ( $open ) {
+ $this->insist_on ( "tr" ) ;
+ $this->add_tag ( "td" , $attrs ) ;
+ } else {
+ $this->close_tag ( "td" ) ;
+ }
+ }
+
+ function tag_tablehead ( $open , &$attrs ) {
+ if ( $open ) {
+ $this->insist_on ( "tr" ) ;
+ $this->add_tag ( "th" , $attrs ) ;
+ } else {
+ $this->close_tag ( "th" ) ;
+ }
+ }
+
+ function tag_xhtml_table ( $open , &$attrs ) { $this->tag_table ( $open , $attrs
) ; }
+ function tag_xhtml_tr ( $open , &$attrs ) { $this->tag_tablerow ( $open , $attrs
) ; }
+ function tag_xhtml_td ( $open , &$attrs ) { $this->tag_tablecell ( $open , $attrs
) ; }
+ function tag_xhtml_th ( $open , &$attrs ) { $this->tag_tablehead ( $open , $attrs
) ; }
+ function tag_xhtml_caption ( $open , &$attrs ) { $this->tag_tablecaption ( $open
, $attrs ) ; }
+
+ function tag_article ( $open , &$attrs ) {
+ if ( !$open ) return ;
+ if ( !isset ( $attrs['TITLE'] ) ) return ;
+ $this->add_tag ( "h1" ) ;
+ $this->add ( urldecode ( $attrs['TITLE'] ) ) ;
+ $this->close_tag ( "h1" ) ;
+ }
+
+
+}
+
+
+# Global functions for parsing
+
+function XML2XHTML_START($parser, $name, $attrs) {
+ global $xml2xhtml ;
+ $name = strtolower ( $name ) ;
+ $function = 'tag_' . str_replace ( ':' , '_' , $name ) ;
+ if ( method_exists ( $xml2xhtml , $function ) ) {
+ $xml2xhtml->$function ( true , $attrs ) ;
+ } else {
+ }
+}
+
+function XML2XHTML_END($parser, $name) {
+ global $xml2xhtml ;
+ $name = strtolower ( $name ) ;
+ $function = 'tag_' . str_replace ( ':' , '_' , $name ) ;
+ if ( method_exists ( $xml2xhtml , $function ) ) {
+ $xml2xhtml->$function ( false , $attrs ) ;
+ } else {
+ }
+}
+
+function XML2XHTML_DATA ( $parser, $data ) {
+ global $xml2xhtml ;
+ if ( $xml2xhtml->ignore_counter > 0 ) return ;
+ $xml2xhtml->s .= $xml2xhtml->fix_text ( $data ) ;
+}
+
+function convert_xml_xhtml ( &$xml ) {
+ global $xml2xhtml ;
+ $xml2xhtml = new XML2XHTML ;
+ $xml_parser_handle = xml_parser_create();
+ xml_set_element_handler($xml_parser_handle, "XML2XHTML_START",
"XML2XHTML_END");
+ xml_set_character_data_handler($xml_parser_handle, "XML2XHTML_DATA");
+
+
+ if ( is_array ( $xml ) ) {
+ xml_parse($xml_parser_handle, xml_articles_header() , false) ;
+
+ while ( $x = xml_shift ( $xml ) ) {
+ xml_parse($xml_parser_handle, $x, false) ;
+ }
+
+
+ xml_parse($xml_parser_handle, '</articles>', true) ;
+ } else {
+
+ xml_parse($xml_parser_handle, xml_articles_header(), false ) ;
+ xml_parse($xml_parser_handle, $xml) ;
+ xml_parse($xml_parser_handle, '</articles>' ) ;
+ }
+
+
+
+
+# if (!($parse_handle = fopen($xml_filename, 'r'))) {
+# die("FEHLER: Datei $xml_filename nicht gefunden.");
+# }
+
+# while ($xml_data = fread($parse_handle, 4096)) {
+# if (!xml_parse($xml_parser_handle, $xml_data, feof($parse_handle))) {
+# die(sprintf('XML error: %s at line %d',
+# xml_error_string(xml_get_error_code($xml_parser_handle)),
+# xml_get_current_line_number($xml_parser_handle)));
+# }
+# }
+
+ xml_parser_free($xml_parser_handle);
+}
+
+?>
Added: projects/wiki/extensions/wiki2xml/xmldump2files.php
===================================================================
--- projects/wiki/extensions/wiki2xml/xmldump2files.php (rev 0)
+++ projects/wiki/extensions/wiki2xml/xmldump2files.php 2007-07-04 19:14:29 UTC (rev
3798)
@@ -0,0 +1,151 @@
+<?php
+
+# Change there to your local settings
+$dumpfile = "K:\\dewiki-20060327-pages-articles.xml" ;
+$basedir = "C:" ;
+
+#______________________________________________________________________________
+# GLOBAL VARIABLES
+$dir = "" ;
+$namespaces = array () ;
+$mem = array () ;
+$tags = array () ;
+$page_counter = 0 ;
+
+# FUNCTIONS
+
+require_once ( "global_functions.php" ) ;
+
+function store_file ( &$loc , &$text , $mode = "text" ) {
+ if ( $mode == "text" ) {
+ if ( !$handle = fopen($loc->fullname.".txt", 'wb') ) {
+ print "Failed to open {$loc->file}.txt!<br/>" ;
+ flush () ;
+ }
+ fwrite($handle, $text) ;
+ fclose ( $handle ) ;
+ } else if ( $mode == "gzip" ) {
+ if ( !$gz = gzopen($loc->fullname.".gz",'w9') ) {
+ print "Failed to open {$loc->file}.gz!<br/>" ;
+ flush () ;
+ }
+ gzwrite($gz, $text);
+ gzclose($gz);
+ }
+}
+
+function microtime_float()
+{
+ list($usec, $sec) = explode(" ", microtime());
+ return ((float)$usec + (float)$sec);
+}
+
+# Global functions for parsing
+
+function XML2TXT_START($parser, $name, $attrs) {
+ global $mem , $tags ;
+ $mem["name"] = $name ;
+ $tags[] = $name ;
+ if ( $name == "NAMESPACE" ) {
+ $mem['key'] = $attrs["KEY"] ;
+ } else if ( $name == "TEXT" ) {
+ $mem['text'] = "" ;
+ }
+}
+
+function XML2TXT_END($parser, $name) {
+ global $mem , $namespaces , $tags , $page_counter , $dir ;
+ if ( $mem['name'] == 'NAMESPACE' ) {
+ $namespaces[$mem['key']] = $mem['text'] ;
+ } else if ( $mem['name'] == 'PAGE' ) {
+ $loc = get_file_location_global ( $dir , $mem['namespace'] ,
$mem['title'] , true ) ;
+ store_file ( $loc , $mem['text'] , 'text' ) ;
+
+ $page_counter++ ;
+ if ( $page_counter % 1000 == 0 ) {
+ print '.' ;
+ if ( $page_counter % 50000 == 0 ) print "<br/>" ;
+ flush () ;
+ }
+ }
+
+ array_pop ( $tags ) ;
+ if ( count ( $tags ) > 0 ) {
+ $mem['name'] = array_pop ( $tags ) ;
+ $tags[] = $mem['name'] ;
+ } else {
+ $mem['name'] = "" ;
+ }
+}
+
+function XML2TXT_DATA ( $parser, $data ) {
+ global $mem , $namespaces ;
+ if ( $mem['name'] == 'NAMESPACE' ) {
+ $mem['text'] = $data ;
+ } else if ( $mem['name'] == 'TITLE' ) {
+ $ns = 0 ;
+ foreach ( $namespaces AS $k => $v ) {
+ if ( $k <= 0 ) continue ;
+ if ( substr ( 0 , strlen ( $v ) + 1 ) != $v.":" ) continue ;
+ $ns = $k ;
+ $data = substr ( $data , strlen ( $v ) + 1 ) ;
+ break ;
+ }
+ $mem['title'] = $data ;
+ $mem['namespace'] = $ns ;
+ } else if ( $mem['name'] == 'TEXT' ) {
+ $mem['text'] .= $data ;
+ }
+}
+
+function scan_xml_file ( $xml_filename ) {
+ global $namespaces , $dir , $page_counter ;
+ $xml_parser_handle = xml_parser_create();
+ xml_set_element_handler($xml_parser_handle, "XML2TXT_START",
"XML2TXT_END");
+ xml_set_character_data_handler($xml_parser_handle, "XML2TXT_DATA");
+
+ if (!($parse_handle = fopen($xml_filename, 'r'))) {
+ die("FEHLER: Datei $xml_filename nicht gefunden.");
+ }
+
+ $t1 = microtime_float() ;
+ while ($xml_data = fread($parse_handle, 8192)) {
+ if (!xml_parse($xml_parser_handle, $xml_data, feof($parse_handle))) {
+ die(sprintf('XML error: %s at line %d',
+ xml_error_string(xml_get_error_code($xml_parser_handle)),
+ xml_get_current_line_number($xml_parser_handle)));
+ }
+
+/* if ( $page_counter % 100 == 0 ) {
+ $t2 = microtime_float() - $t1 ;
+ $t3 = $t2 * 1000 / $page_counter ;
+ print $t3 . " sec/1000 pages<br/>" ; flush () ;
+ }*/
+ }
+ $t2 = microtime_float() - $t1 ;
+ print "Took {$t2} seconds total.<br/>" ; flush () ;
+
+ xml_parser_free($xml_parser_handle);
+
+ $handle = fopen($dir."/namespaces.txt", 'wb') ;
+ foreach ( $namespaces AS $ns => $nst ) {
+ $t = "{$ns}:{$nst}\n" ;
+ fwrite($handle, $t) ;
+ }
+ fclose ( $handle ) ;
+
+}
+
+
+# MAIN
+
+$dir = array_pop ( explode ( "/" , str_replace ( "\\" , "/"
, $dumpfile ) ) ) ;
+$dir = $basedir . "/" . str_replace ( ".xml" , "" , $dir )
;
+
+@set_time_limit ( 0 ) ; # No time limit
+#ini_set('user_agent','MSIE 4\.0b2;'); # Fake user agent
+header ('Content-type: text/html; charset=utf-8');
+@mkdir ( $dir ) ;
+scan_xml_file ( $dumpfile ) ;
+
+?>
Modified: projects/wiki/skins/Devmo.php
===================================================================
--- projects/wiki/skins/Devmo.php 2007-07-04 11:33:22 UTC (rev 3797)
+++ projects/wiki/skins/Devmo.php 2007-07-04 19:14:29 UTC (rev 3798)
@@ -47,6 +47,7 @@
* @access private
*/
function execute() {
+ global $wgRequest;
// Suppress warnings to prevent notices about missing indexes in $this->data
wfSuppressWarnings();
@@ -56,14 +57,22 @@
<meta http-equiv="Content-Type" content="<?php
$this->text('mimetype') ?>; charset=<?php
$this->text('charset') ?>" />
<meta http-equiv="Content-Language" content="<?php
$this->text('lang') ?>"/>
<?php $this->html('headlinks') ?>
- <title><?php $this->text('pagetitle') ?></title>
+ <title><?php $this->text('pagetitle')?></title>
+ <?php if ( $wgRequest->getText('printable') == "yes" ) {
?>
+ <link rel="stylesheet" type="text/css"
media="all" href="<?php $this->text('stylepath' )
?>/devmo/css/base.css" />
+ <link rel="stylesheet" type="text/css" media="all"
href="<?php $this->text('stylepath' ) ?>/devmo/css/wiki.css"
/>
+ <link rel="stylesheet" type="text/css" media="all"
href="<?php $this->text('stylepath' )
?>/devmo/css/wikiprint.css" />
+ <?php } else { ?>
+
<link rel="stylesheet" type="text/css" media="all"
href="<?php $this->text('stylepath' ) ?>/devmo/css/base.css"
/>
<link rel="stylesheet" type="text/css" media="all"
href="<?php $this->text('stylepath' ) ?>/devmo/css/wiki.css"
/>
<link rel="stylesheet" type="text/css" media="print"
href="<?php $this->text('stylepath' )
?>/devmo/css/wikiprint.css" />
<!--[if gte IE 6]><style type="text/css">@import "<?php
$this->text('stylepath') ?>/<?php $this->text('stylename')
?>/IE60Fixes.css";</style><![endif]-->
<!--[if IE]><script type="<?php $this->text('jsmimetype')
?>" src="<?php $this->text('stylepath')
?>/common/IEFixes.js"></script>
<meta http-equiv="imagetoolbar" content="no"
/><!{endif]-->
+ <?php } ?>
+
<?php if($this->data['jsvarurl' ]) { ?><script
type="<?php $this->text('jsmimetype') ?>" src="<?php
$this->text('jsvarurl' ) ?>"></script><?php } ?>
<script type="<?php $this->text('jsmimetype') ?>"
src="<?php $this->text('stylepath' )
?>/common/wikibits.js"></script>
<script type="<?php $this->text('jsmimetype') ?>"
src="<?php $this->text('stylepath' )
?>/devmo/devmo.js"></script>
@@ -238,7 +247,8 @@
<div id="content">
- <div class="article">
+ <div class="article">
+
<a name="top" id="contentTop"></a>
<?php if($this->data['sitenotice']) { ?><div
id="siteNotice"><?php $this->html('sitenotice')
?></div><?php } ?>
<h1 class="firstHeading"><?php $this->text('title')
?></h1>
Modified: projects/wiki/skins/devmo/css/wikiprint.css
===================================================================
--- projects/wiki/skins/devmo/css/wikiprint.css 2007-07-04 11:33:22 UTC (rev 3797)
+++ projects/wiki/skins/devmo/css/wikiprint.css 2007-07-04 19:14:29 UTC (rev 3798)
@@ -4,6 +4,9 @@
#sidebar {
display: none;
}
+#container {
+ width: 550px;
+}
#content {
background: #fff;
margin: 20px;
@@ -31,6 +34,9 @@
.breadcrumbs {
display: none;
}
+.editsection {
+ display: none;
+}
.article .urlexpansion {
display: inline;
}