MediaWiki:ExtractFirst.xsl

<?xml version="1.0" encoding="UTF-8"?>
 <!-- This is for extracting the first definition of a word from wiktionary, that can be used in a cross site manner. Consider: 
 http://en.wiktionary.org/w/api.php?action=parse&prop=text&page=word&format=xml&xslt=MediaWiki:extractFirst.xsl -->
 <xsl:stylesheet version="1.0"
 xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 <xsl:output method='html'/>
 <!-- for translation. Also see JS -->
 <xsl:variable name="dir">ltr</xsl:variable>
 <xsl:variable name="more">» Meer</xsl:variable>
 <xsl:variable name="error">Fout: </xsl:variable>
 <xsl:variable name="copyright"> © <a href="http://nl.wiktionary.org/wiki/">WikiWoordenboek</a>. Vrijgegeven onder <a href="http://creativecommons.org/licenses/by-sa/3.0/deed.nl" rel="license copyright">CC-BY-SA 3.0</a></xsl:variable>
 <xsl:variable name="contentLang" select="'nl'"/> <!-- make sure quoted-->

<!-- other variables -->

 <!--<xsl:variable name="pageName" select="string(/api@requestid)"/> -->

 <!--<xsl:variable name="pageNamedf" select="escape-html-uri('../foo')"/>-->
 <xsl:variable name="pageName" select="concat('http://', $contentLang, '.wiktionary.org')"/>
<!-- end translation. see js as well -->
 <xsl:template match="/">

 <html dir="{$dir}" lang="{$contentLang}" xml:lang="{$contentLang}">
 <head>
 <meta name="generator" content="Wiktionary Extract XSLT 1.04-NL"/>
 <base target='_blank' href="{$pageName}" />
<title> Wiktionary extract</title>
 
 <style>
 .disambig-see-also, .disambig-see-also-2 {display:inline;}
 #container {background-color:white; padding: 0.5em; border: solid black thin;}
 a.new {color: red;}
 #error {color: red;font-size:larger;}
 </style>
 <script type='text/javascript'>
 /*<![CDATA[*/
 function setup () {
 //Stuff to translate:
 var preferLang = {'nl': 'Nederlands', 'en': 'Engels', 'qqqAny': null}; //for now.
 var extractSeeAlso = /<table class=\"bandeau-voir\"[^>]*>[\s\S]*?([\s\S]*?[\s\S]*?)<\/td>[\s\S]*?<\/table>/; //Modified elsewhere!
 var see_also_process = function (sa) {
  return sa[1].replace(/<a[^>]*><img[^>]*\/><\/a>/, '');
 }
 var missing = 'Could not retrieve definition of $1.';
 //Stuff not to translate in general (setup).


 var pageURL = '/wiki/' +location.search.match(/\&page\=([^&]*)/)[1];
 var src = document.getElementById('src');
 var display = document.getElementById('word-list');
 var loc = location.search.match(/\&page\=([^&]*)/)[1];
 var preferLangCode = location.search.match(/\&lang\=([^&]*)/);
 if (preferLangCode) { preferLangCode = preferLangCode[1]; }
 else {preferLangCode = 'qqqAny';}
 src.normalize();
 var html = src.firstChild.data;
 var def = html //may be redefined later.

  //stuff you might need to translate, but hopefully won't
  var subSectRegex = new RegExp('<h2><span class="mw-headline" id="' + preferLang[preferLangCode] + '"[^>]*>[\\s\\S]*$'); //this is ugly
  var extractCurLangName = /<span class="mw-headline" id[^>]*>([\s\S]*?)<\/span>/; //first subexpression
 
 try {
  //this assumes attribute order doesn't change!!!
  html = html.replace(/<div id="toctitle">[\s\S]*?<\/div>/, '');
  if (preferLangCode && preferLang[preferLangCode]) {
   try {
    //strip off all definitions before tagret lang.
    var subSect = html.match(subSectRegex)[0];
    if (subSect.match(/<ol>[\s\S]*?<li>/)) {
     //if it has content
     def = subSect;
    }
   } catch (e) { /*alert(e)*/}
  } 
  var lang = def.match(extractCurLangName)[1];
  display.innerHTML = '(' + lang + ') ' +  def.match(/<ol>[\s\S]*?<\/ol>/)[0].replace(/<dl>[\s\S]*?<\/dl>/g, '').replace(/<div[^>]*>[\s\S]*?<\/div>/g, '').replace(/<ul>[\s\S]*?<\/ul>/g, '').match(/<li>([\s\S]*?)<\/li>/)[1];
 }
 catch (e) {
  //alert(e)
  //page does not exist, not well formed, these regexs suck, etc
  display.appendChild(document.createTextNode(mising.replace('$1', decodeURIComponent(loc))));
  if (decodeURIComponent(loc).match(/[A-Z]/) && !location.search.match(/\&rd/)) {
   //make sure don't have loops, if first character is for example % encoded.
   //should probably decode it.
   location = location.href.replace(/(^[\s\S]*?\&page\=)[^&]*([\s\S]*$)/, '$1'+ loc.charAt(0).toLowerCase() + loc.substring(1, loc.length) + '$2&rd');
  }
 }
 var sa = html.match(extractSeeAlso);
 if(sa) {
  document.getElementById('see-also').innerHTML = ' (' + see_also_process(sa) + ')' ;
 }
 document.getElementsByTagName('base')[0].href += pageURL; //this doesn't do anything.
 document.getElementById('more-link').href= pageURL;
 
}

 /*]]>*/
 </script>
</head>
 <body onload='setup()'>
 <div id='container'>
 <div id='word-list'><xsl:apply-templates select='api/error'/></div>
 <div><a id='more-link'><xsl:value-of select="$more"/></a> <span id='see-also'/> <small id="copyright-notice"> <xsl:copy-of select="$copyright"/></small>
 </div>  
 </div>
 <div id='src' style='display:None'>
  <xsl:value-of select='api/parse/text'/>
 </div>
 </body>
 </html>

 </xsl:template>
 <xsl:template match='api/error'>
 <span id='error'><b><xsl:value-of select="$error"/></b> <xsl:value-of select='@info'/></span>
 </xsl:template>

 </xsl:stylesheet>