[tei-council] EEBO and ECCO to P5

Sebastian Rahtz sebastian.rahtz at oucs.ox.ac.uk
Tue Jun 5 17:18:01 EDT 2012


I now have all 42496 texts from EEBO and ECCO in a state where they are valid
against a variant of P5. About 2000 of these are NOT valid against tei_all, for
one or more of the reasons listed below. Sorry, I don't have the data
to say which of these customizations triggers how many failed validations.

The transform is at http://tei.svn.sourceforge.net/viewvc/tei/trunk/Stylesheets/tcp/tcp2tei.xsl?revision=10451&view=markup
for those who like that sort of thing.

I thought it would be worth pasting in below my whole current ODD,
so you can see the relaxations to content models which 
I have had to put in. Some are also SF tickets.

Some of these things will come out of ongoing work around divliminal,
but most of what you see here are fairly mundanely allowing
combinations you may well think are weird, but do seem to occur
in the TCP wild.

Obviously, I'd be really really keen to see if we can resolve
these remaining things in time for the TCP conference in September,
but that may be a big ask. The rather hard task is to review the changes
I am doing in tcp2tei.xsl and see if any of them are Wrong.

Paul can probably supply examples faster than me as needed.

	<!-- relax data type of @n for now. This caters for n="‡" . in SF as a bug report -->
	<classSpec ident="att.global" mode="change" xmlns="http://www.tei-c.org/ns/1.0">
	  <attList>
	    <attDef ident="n" mode="change">
	      <datatype>
		<text xmlns="http://relaxng.org/ns/structure/1.0"/>
	      </datatype>
	    </attDef>
	  </attList>
	</classSpec>

	<!-- signed gets looser content model (paraContent, not
	     phraseSeq) and allowed to appear at top of div as well as
	     bottom-->
        <elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="signed">
	  <classes model="replace">
	    <memberOf key="att.global"/>
	    <memberOf key="model.divBottomPart"/>
	    <memberOf key="model.divTopPart"/>
	  </classes>
          <content>
            <ref xmlns="http://relaxng.org/ns/structure/1.0" name="macro.paraContent"/>
          </content>
        </elementSpec>

	<!-- stage gets placement attributes, and allowed in model.phrase -->
        <elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="stage">
          <classes mode="replace">
            <memberOf key="att.global"/>
            <memberOf key="att.placement"/>
            <memberOf key="model.stageLike"/>
            <memberOf key="model.phrase"/>
          </classes>
        </elementSpec>

	<!-- salute gets looser content model (paraContent, not
	     phraseSeq) to let it contain <list> EEBO A00583, A15145 -->
        <elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="salute">
	  <content>
	    <ref xmlns="http://relaxng.org/ns/structure/1.0" name="macro.paraContent"/>
	  </content>
        </elementSpec>

	<!-- trailer gets looser content model to let it contain <l> -->
        <elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="trailer">
	  <content>
	    <zeroOrMore xmlns="http://relaxng.org/ns/structure/1.0">
	      <choice>
		<text/>
		<ref name="lg"/>
		<ref name="model.gLike"/>
		<ref name="model.phrase"/>
		<ref name="model.inter"/>
		<ref name="model.lLike"/>
		<ref name="model.global"/>
	      </choice>
	    </zeroOrMore>
	  </content>
        </elementSpec>

	<!-- cell gets looser content model (specialPara) -->
	<elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="cell">
	  <content>
	    <ref xmlns="http://relaxng.org/ns/structure/1.0" name="macro.specialPara"/>
	  </content>
	</elementSpec>

	<!-- closer is allowed to have postscript inside -->
	<elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="closer">
	  <content>
	    <zeroOrMore xmlns="http://relaxng.org/ns/structure/1.0">
	      <choice>
		<text/>
		<ref name="model.gLike"/>
		<ref name="signed"/>
		<ref name="postscript"/>
		<ref name="dateline"/>
		<ref name="salute"/>
		<ref name="model.phrase"/>
		<ref name="model.global"/>
	      </choice>
	    </zeroOrMore>
	  </content>
	</elementSpec>

	<!-- label gets looser content model -->
	<elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="label">
	  <content>
	    <ref xmlns="http://relaxng.org/ns/structure/1.0" name="macro.specialPara"/>
	  </content>
	</elementSpec>

	<!-- 
	     table needs to allow model.divBottomPart at end, to cater for trailer 
	     https://sourceforge.net/tracker/?func=detail&aid=3531957&group_id=106328&atid=644065

	-->
	<elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="table">
	  <content>
	    <group xmlns="http://relaxng.org/ns/structure/1.0">
	      <zeroOrMore>
		<choice>
		  <ref name="model.headLike"/>
		  <ref name="model.global"/>
		</choice>
	      </zeroOrMore>
	      <choice>
		<oneOrMore>
		  <ref name="row"/>
		  <zeroOrMore>
		    <ref name="model.global"/>
		  </zeroOrMore>
		</oneOrMore>
		<oneOrMore>
		  <group>
		    <ref name="model.graphicLike"/>
		  </group>
		  <zeroOrMore>
		    <ref name="model.global"/>
		  </zeroOrMore>
		</oneOrMore>
	      </choice>
	      <zeroOrMore>
		<ref name="model.divBottomPart"/>
	      </zeroOrMore>
	    </group>
	  </content>
	</elementSpec>

	<!-- a div with just figure and closer. see
	     https://sourceforge.net/tracker/?func=detail&aid=3531963&group_id=106328&atid=644062

             This is much harder than it looks, the solution below produces non-deterministic
	     DTDs
	-->
	<elementSpec xmlns="http://www.tei-c.org/ns/1.0" mode="change" ident="div">
	  <content>
	    <choice xmlns="http://relaxng.org/ns/structure/1.0">	      
	      <group>
		<zeroOrMore>
		  <choice>
		    <ref name="model.divTop"/>
		    <ref name="model.global"/>
		  </choice>
		</zeroOrMore>
		<optional>
		  <choice>
		    <group>
		      <oneOrMore>
			<choice>
			  <ref name="model.divLike"/>
			  <ref name="model.divGenLike"/>
			</choice>
			<zeroOrMore>
			  <ref name="model.global"/>
			</zeroOrMore>
		      </oneOrMore>
		    </group>
		    <group>
		      <oneOrMore>
			<group>
			  <ref name="model.common"/>
			</group>
			<zeroOrMore>
			  <ref name="model.global"/>
			</zeroOrMore>
		      </oneOrMore>
		      <zeroOrMore>
			<choice>
			  <ref name="model.divLike"/>
			  <ref name="model.divGenLike"/>
			</choice>
			<zeroOrMore>
			  <ref name="model.global"/>
			</zeroOrMore>
		      </zeroOrMore>
		    </group>
		  </choice>
		  <zeroOrMore>
		    <group>
		      <ref name="model.divBottom"/>
		    </group>
		    <zeroOrMore>
		      <ref name="model.global"/>
		    </zeroOrMore>
		  </zeroOrMore>
		</optional>
	      </group>

	      <group>
		<zeroOrMore>
		  <ref name="model.global"/>
		</zeroOrMore>
		<optional>
		  <group>
		    <ref name="model.divTop"/>		  
		  </group>
		  <zeroOrMore>
		    <choice>
		      <ref name="model.global"/>
		      <ref name="model.divTop"/>		 
		    </choice>
		  </zeroOrMore>
		</optional>
		<optional>
		  <group>
		    <ref name="model.divBottom"/>		  
		  </group>
		  <zeroOrMore>
		    <choice>
		      <ref name="model.global"/>
		      <ref name="model.divBottom"/>		 
		    </choice>
		  </zeroOrMore>
		</optional>
	      </group>	      
	    </choice>
	  </content>
	</elementSpec>	
--
Sebastian Rahtz      
Head of Information and Support Group
Oxford University Computing Services
13 Banbury Road, Oxford OX2 6NN. Phone +44 1865 283431








More information about the tei-council mailing list