Conversion from TEI P4 to TEI P5 (Sebastian Ratz stylesheet).
Metadata : from <teiHeader><fileDesc><titleStmt>, get
Manage XML-TEI features which wouldn't work with CQP :
Distribute <milestone> attributes' information on word tokens (when available).
Get page number when available, put it as an @n attibute on <pb> element so that TXM can use it to number pages in HTML Edition.
Render foreign words (tagged with <foreign> element) and titles (<title> elements content) as italics.
Make a directory (e.g. “cicero”).
This directory includes :
Then run the TXM command File>Import>XML-XTZ + CSV with the following settings :
1. Source directory is “cicero” (in our example).
2. Import parameters :
3. Click on “Start corpus import” (above - beginning of the page)
Another import can be done, adding a metadata.csv file in order to get more metadata than only the ones automatically extracted from teiHeader (title, first author, first editor).
Some features of XML-XTZ import have not been implemented yet, especially @rend attribute seems is not used to interpret <emph> and <hi> elements. So, through the front XSL (import step #2), we have changed some <hi> into <emph> for cases for which we wanted italics in HTML edition.
<note> content looses all its markup, this is really a drawback as tagged foreign words and italics are very often use in notes.
<?xml version="1.0"?> <xsl:stylesheet xmlns:xd="http://www.pnp-software.com/XSLTdoc" xmlns:edate="http://exslt.org/dates-and-times" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" exclude-result-prefixes="tei edate xd" version="2.0"> <xd:doc type="stylesheet"> <xd:short> A stylesheet to prepare PERSEUS XML-TEI texts to TXM import. </xd:short> <xd:detail> This stylesheet is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This stylesheet is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of GNU Lesser Public License with this stylesheet. If not, see http://www.gnu.org/licenses/lgpl.html </xd:detail> <xd:author>Alexei Lavrentiev alexei.lavrentev@ens-lyon.fr</xd:author> <xd:copyright>2017, CNRS / IHRIM (Groupe CACTUS)</xd:copyright> </xd:doc> <xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/> <xsl:template match="node()|@*"> <!-- Copy the current node --> <xsl:copy> <!-- Including any attributes it has and any child nodes --> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> <!-- This template had better be commented if one uses a metadata file with the same information : --> <xsl:template match="/tei:TEI/tei:text"> <xsl:copy> <xsl:copy-of select="@*"/> <xsl:attribute name="author"><xsl:value-of select="//tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:author[1]"/></xsl:attribute> <xsl:attribute name="title"><xsl:value-of select="//tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title[1]"/></xsl:attribute> <xsl:attribute name="editor"><xsl:value-of select="//tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:editor[1]"/></xsl:attribute> <xsl:apply-templates/> </xsl:copy> </xsl:template> <xsl:template match="tei:group/tei:text"> <xsl:element name="subtext"> <xsl:apply-templates select="@*|node()"/> </xsl:element> </xsl:template> <xsl:template match="tei:pb"> <xsl:copy> <xsl:attribute name="n"> <xsl:choose> <xsl:when test="@n"><xsl:value-of select="@n"/></xsl:when> <xsl:when test="@*:id"> <xsl:value-of select="replace(@*:id,'^p\.','')"/> </xsl:when> <xsl:otherwise><xsl:text>[s.n.]</xsl:text></xsl:otherwise> </xsl:choose> </xsl:attribute> </xsl:copy> </xsl:template> <xsl:template match="tei:div1|tei:div2|tei:div3|tei:div4|tei:div5|tei:div6|tei:div7"> <xsl:element name="div" namespace="http://www.tei-c.org/ns/1.0"> <xsl:apply-templates select="@*|node()"/> </xsl:element> </xsl:template> <xsl:template match="tei:choice"> <xsl:apply-templates select="tei:expan|tei:corr|tei:reg"/> </xsl:template> <xsl:template match="tei:choice/tei:expan"> <w xmlns="http://www.tei-c.org/ns/1.0"> <xsl:attribute name="abbr"><xsl:value-of select="normalize-space(parent::tei:choice/tei:abbr)"/></xsl:attribute> <xsl:apply-templates select="@*|node()"/> </w> </xsl:template> <xsl:template match="tei:choice/tei:corr"> <xsl:copy> <xsl:attribute name="sic"><xsl:value-of select="normalize-space(parent::tei:choice/tei:sic)"/></xsl:attribute> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> <xsl:template match="tei:choice/tei:reg"> <xsl:copy> <xsl:attribute name="orig"><xsl:value-of select="normalize-space(parent::tei:choice/tei:orig)"/></xsl:attribute> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> <!-- Temporary patch for TXM indexing quote elements in notes --> <xsl:template match="tei:note//tei:quote"> <quote-note> <xsl:apply-templates select="@*|node()"/> </quote-note> </xsl:template> <!-- (i) adding an <emph> element in order to point out some elements' content (e.g. foreign, title) in TXM edition ; (ii) adding a <w> element to prevent tokenisation from analysing some content (e.g. foreign) --> <xsl:template match="tei:foreign[not(ancestor::tei:note)]"> <emph rend="italic" xmlns="http://www.tei-c.org/ns/1.0"> <xsl:copy> <w xmlns="http://www.tei-c.org/ns/1.0"> <xsl:apply-templates select="@*|node()"/> </w> </xsl:copy> </emph> </xsl:template> <xsl:template match="tei:title"> <emph rend="italic" xmlns="http://www.tei-c.org/ns/1.0"> <xsl:copy> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </emph> </xsl:template> <!-- Temporary patch to get the correct rendering for <hi @rend="italic"> content in TXM editions : must use <emph> instead of <hi> --> <xsl:template match="tei:hi[matches(@rend,'italic')]" priority="1"> <xsl:element name="emph" namespace="http://www.tei-c.org/ns/1.0"> <xsl:apply-templates select="@*|node()"/> </xsl:element> </xsl:template> </xsl:stylesheet>
<?xml version="1.0"?> <xsl:stylesheet xmlns:edate="http://exslt.org/dates-and-times" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:tei="http://www.tei-c.org/ns/1.0" xmlns:txm="http://textometrie.org/ns/1.0" exclude-result-prefixes="tei edate" xpath-default-namespace="http://www.tei-c.org/ns/1.0" version="2.0"> <!-- This software is dual-licensed: 1. Distributed under a Creative Commons Attribution-ShareAlike 3.0 Unported License http://creativecommons.org/licenses/by-sa/3.0/ 2. http://www.opensource.org/licenses/BSD-2-Clause All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. This software is provided by the copyright holders and contributors "as is" and any express or implied warranties, including, but not limited to, the implied warranties of merchantability and fitness for a particular purpose are disclaimed. In no event shall the copyright holder or contributors be liable for any direct, indirect, incidental, special, exemplary, or consequential damages (including, but not limited to, procurement of substitute goods or services; loss of use, data, or profits; or business interruption) however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence or otherwise) arising in any way out of the use of this software, even if advised of the possibility of such damage. This stylesheet adds a ref attribute to w elements that will be used for references in TXM concordances. Can be used with TXM XTZ import module. Written by Alexei Lavrentiev, UMR 5317 IHRIM, 2017 --> <xsl:output method="xml" encoding="utf-8" omit-xml-declaration="no"/> <!-- General patterns: all elements, attributes, comments and processing instructions are copied --> <xsl:template match="*"> <xsl:copy> <xsl:apply-templates select="*|@*|processing-instruction()|comment()|text()"/> </xsl:copy> </xsl:template> <xsl:template match="*" mode="position"><xsl:value-of select="count(preceding-sibling::*)"/></xsl:template> <xsl:template match="@*|comment()|processing-instruction()"> <xsl:copy/> </xsl:template> <xsl:variable name="filename"> <xsl:analyze-string select="document-uri(.)" regex="^(.*)/([^/]+)\.xml$"> <xsl:matching-substring> <xsl:value-of select="regex-group(2)"/> </xsl:matching-substring> </xsl:analyze-string> </xsl:variable> <xsl:template match="tei:w"> <xsl:variable name="ref"> <xsl:choose> <xsl:when test="ancestor::tei:text/@*:id"> <xsl:value-of select="ancestor::tei:text[1]/@*:id[1]"/> </xsl:when> <xsl:otherwise> <xsl:value-of select="$filename"/> </xsl:otherwise> </xsl:choose> <!-- ajout Perseus --> <xsl:if test="preceding::tei:milestone[@unit='chapter'][1][@n]"> <xsl:text>, c. </xsl:text> <xsl:value-of select="preceding::tei:milestone[@unit='chapter'][1]/@n"/> </xsl:if> <xsl:if test="preceding::tei:milestone[@unit='section'][1][@n]"> <xsl:text>, s. </xsl:text> <xsl:value-of select="preceding::tei:milestone[@unit='section'][1]/@n"/> </xsl:if> <!-- fin ajout Perseus --> <xsl:if test="preceding::tei:pb[1]/@n"> <xsl:text>, p. </xsl:text> <xsl:value-of select="preceding::tei:pb[1]/@n"/> </xsl:if> <xsl:if test="ancestor::tei:p[@n]"> <xsl:text>, § </xsl:text> <xsl:value-of select="ancestor::tei:p/@n"/> </xsl:if> <!--<xsl:if test="preceding::tei:lb[1]/@n"> <xsl:text>, l. </xsl:text> <xsl:value-of select="preceding::tei:lb[1]/@n"/> </xsl:if>--> </xsl:variable> <xsl:copy> <xsl:apply-templates select="@*"/> <xsl:attribute name="ref"><xsl:value-of select="$ref"/></xsl:attribute> <xsl:apply-templates select="*|processing-instruction()|comment()|text()"/> </xsl:copy> </xsl:template> </xsl:stylesheet>