Attachment 'modify-tei-segmentation.pl'
Download 1 #!/usr/bin/perl
2
3
4 # Converts a "temporary TEI" segmentation (output of Anotatornia) to target TEI. Minor changes, mostly xlink:href->corresp.
5
6 # Usage:
7 # perl modify-tei-segmentation.pl <input file> <output file>
8 #
9
10 open INTEXT, "<$ARGV[0]";
11 open OUTTEXT, ">$ARGV[1]";
12
13
14 while (<INTEXT>) {
15
16 s#<teiCorpus.*>#<teiCorpus xmlns:xi="http://www.w3.org/2001/XInclude" xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0">#;
17 s/<text xml:lang="pl" xml:id="segm_text">/<text xml:id="segm_text" xml:lang="pl">/;
18 s/<p xlink:href=(".*?")/<p corresp=$1/;
19 s/<nkjp:paren nkjp:rejected="true">/<nkjp:paren>/;
20
21 if (/<seg (.*?)xml:id="(segm_.*?)"/) {
22 $segpars = $1;
23 $segid = $2;
24 } elsif (/<xi:include href="(.*?)\.xml" xpointer="(.*?)"\/>/) {
25 $segdesc="<seg corresp=\"$1.xml#$2\" $segpars"."xml:id=\"$segid\"\/>";
26 } elsif (/(\s*)<!--.*?-->/) {
27 print OUTTEXT;
28 print OUTTEXT "$1$segdesc\n" unless $segdesc eq '';
29 } elsif (/<\/s>/) {
30 $segdesc="";
31 print OUTTEXT;
32 } elsif (/<\/seg>/) {
33 } elsif (/^<\?o/) {
34 } else {
35 print OUTTEXT;
36 }
37
38 }
39
40 close INTEXT;
41 close OUTTEXT;
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.