#-*- coding: utf-8 -*-

import sys
from lxml import etree, isoschematron
import tagset

lmfFile = sys.argv[1]

tree = etree.parse(lmfFile)

validator = tagset.TagValidator('morphosyntax.cfg')

def xpath(node, query, **kwargs):
    return node.xpath(query, **kwargs)

def validateWithRNG(tree):
    rngTree = etree.parse('schema.rng')
    rng = etree.RelaxNG(rngTree)
    rng.assertValid(tree)

#def validateWithSchematron(tree):
#    schTree = etree.parse('lmf.sch')
#    schematron = isoschematron.Schematron(schTree)
#    schematron.assertValid(tree)

def getFeatValue(node, att, defaultVal=None):
    res = xpath(node, 'feat[@att=$att]/@val', att=att)
    if res:
        return res[0]
    else:
        return defaultVal

def validateWordForm(formNode):
    #~ print formNode.sourceline
    lemmaNode = xpath(formNode, 'preceding-sibling::Lemma')[0]
    lemma = getFeatValue(lemmaNode, 'writtenForm')
    assert lemma != None
    pos = getFeatValue(formNode.getparent(), 'partOfSpeech')
    number = getFeatValue(formNode, 'number')
    case = getFeatValue(formNode, 'case')
    gender = getFeatValue(formNode, 'gender')
    degree = getFeatValue(formNode, 'degree')
    if None in [number, case, gender]:
        pass
        assert (number, case, gender) == (None, None, None)
    else:
        attrs = [number, case, gender]
        if degree:
            attrs.append(degree)
        validator.validateTag(lemma, pos, ':'.join(attrs))

def validateLexeme(lexNode):
    #~ print xpath(lexNode, 'Lemma/feat[@att="writtenForm"]/@val')[0]
    for formNode in xpath(lexNode, './/WordForm'):
        validateWordForm(formNode)
    for relNode in xpath(lexNode, './/SenseRelation'):
        assert xpath(tree, '//LexicalEntry/Sense[@id=$id]', id=relNode.attrib['targets'])

#~ validateWithRNG(tree)
#~ validateWithSchematron(tree)

for lexNode in xpath(tree, '//LexicalEntry'):
    validateLexeme(lexNode)


