#!/usr/bin/perl


# Converts a "temporary TEI" segmentation (output of Anotatornia) to target TEI. Minor changes, mostly xlink:href->corresp.

# Usage:
#	perl modify-tei-segmentation.pl <input file> <output file>
#

open INTEXT, "<$ARGV[0]";
open OUTTEXT, ">$ARGV[1]";


while (<INTEXT>) {

    s#<teiCorpus.*>#<teiCorpus xmlns:xi="http://www.w3.org/2001/XInclude" xmlns="http://www.tei-c.org/ns/1.0" xmlns:nkjp="http://www.nkjp.pl/ns/1.0">#;
    s/<text xml:lang="pl" xml:id="segm_text">/<text xml:id="segm_text" xml:lang="pl">/;
    s/<p xlink:href=(".*?")/<p corresp=$1/;
    s/<nkjp:paren nkjp:rejected="true">/<nkjp:paren>/;
    
    if (/<seg (.*?)xml:id="(segm_.*?)"/) {
	$segpars = $1;
	$segid = $2; 
    } elsif (/<xi:include href="(.*?)\.xml" xpointer="(.*?)"\/>/) {
	$segdesc="<seg corresp=\"$1.xml#$2\" $segpars"."xml:id=\"$segid\"\/>";
    } elsif (/(\s*)<!--.*?-->/) {
	print OUTTEXT;
	print OUTTEXT "$1$segdesc\n" unless $segdesc eq '';
    } elsif (/<\/s>/) {
	$segdesc="";
	print OUTTEXT;
    } elsif (/<\/seg>/) {
    } elsif (/^<\?o/) {
    } else {
	print OUTTEXT;
    }
    
}

close INTEXT;
close OUTTEXT;

