/*
 * Copyright (C) 2007 by Instytut Podstaw Informatyki Polskiej
 * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 * Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 *
 * This file is part of Spejd.
 *
 * Spejd is free software: it may be distributed and/or modified under 
 * the terms of the GNU General Public License version 3 as published 
 * by the Free Software Foundation and appearing in the file doc/gpl.txt
 * included in the packaging of this file.
 *
 * A commercial license is available from IPI PAN (contact
 * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
 * information).  Licensees holding a valid commercial license from IPI
 * PAN may use this file in accordance with that license.
 *
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */ 
 
 
package ipipan.spejd.rules;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.TreeMap;

import ipipan.spejd.entities.Entity;
import ipipan.spejd.tagset.Attribute;
import ipipan.spejd.util.Config;
import ipipan.spejd.util.TimeWatch;
import ipipan.spejd.util.Util;


/**
 * Not official yet.
 */
public class Gazetteer implements Match {
    
    TreeMap<String,GazetteerEntry> gaz;
    ArrayList<GazetteerEntry> entryList;

    int order;
    int type;
    
    Entity[] chunk;
    int pos;
    int gtime;
    TimeWatch timer;
    Config conf;

    public Gazetteer(int order, int type, Config conf) {
        this.conf  = conf;
	this.order = order;
	this.type  = type;
	gaz = new TreeMap<String, GazetteerEntry>();
	entryList = new ArrayList<GazetteerEntry>();
	timer = new TimeWatch();
	gtime = 0;
    }

    public void addEntry(GazetteerEntry entry) {
	entryList.add(entry);
        Util u = new Util(conf);
	String[] keys = entry.fromFile ? u.file(entry.key) : entry.key.split("\\|");
	for(int i = 0; i < keys.length; i ++) 
	    gaz.put(keys[i], entry);
    }


    /**
     * This is a modified copy of Entity's method - to get rid of "static".
     * Concatenate orth or base of a sequence of entities, adding
     * space or not with respect to ns special entities.
     *
     * @param id attribute to concatenate (Attribute.ORTH or Attribute.BASE)
     * @param s  an array of Entities
     * @param start start of the sequence to concatenate
     * @param length length of the sequence to concatenate
     * @param countNospace whether NOSPACE should be counted in the length
     */
    public String concatAttr(int id, Entity[] s, 
			     int start, int length, boolean countNospace) {
	StringBuilder res = new StringBuilder();
	boolean nospace = true;
	int len = 0;
	for(int i = start; i < s.length; i ++) {
	    if(s[i].type == Entity.NOSPACE) {
		nospace = true;
		if(countNospace) len ++;
		continue;
	    }
	    if(!nospace)
		res.append(' ');
	    res.append(s[i].getAttr(id));
	    nospace = false;
	    len ++;
	    if(len >= length) 
		break;
	}
	return res.toString();
    }

    public boolean apply(Entity[] chunk) {

	boolean modified = false;
	timer.getInterval();
	this.chunk = chunk;

	for(pos = 0; pos <= chunk.length - order; pos ++) {
	    String key = concatAttr(type, chunk, pos, order, true);
	    GazetteerEntry r = gaz.get(key);
	    
	    if(r != null) {
		System.out.println(key);
		modified |= r.applyActions(this);
	    }
	}
	gtime += timer.getInterval();
	return modified;
    }

    public Entity getSingle(int s) {
	System.out.print(chunk[pos]);
	System.out.println("["+s+"]");
	return chunk[pos + s];
    }

    public Entity[] getSpec(int s) {
	Entity[] res = new Entity[1];
	res[0] = chunk[pos + s];
	return res;
    }

    public Entity[] get(int[] s) {
	Entity[] res = new Entity[s.length];
	for(int i = 0; i < s.length; i++)
	    res[i] = chunk[pos + s[i]];
	return res; 
    }

    public Entity[] getMatch() {
	Entity[] res = new Entity[order];
	for(int i = 0; i < order; i++)
	    res[i] = chunk[pos + i];
	return res;
    }

    public void log(PrintStream log) {
	log.println("Gazetteer: " + order);
	log.println("Entries: " + entryList.size() + ", keys: " + gaz.size());
	log.println();
	for(GazetteerEntry entry : entryList)
	   entry.log(log);
    }

    public void report(PrintStream log) {
	GazetteerEntry[] rules = 
	    entryList.toArray(new GazetteerEntry[0]);

        int completed = 0, matched = 0;
        long tmatch = 0, taction = 0;

        for(int i = 0; i < rules.length; i ++) {
            rules[i].report(log);
            completed += rules[i].completed;
            matched += rules[i].matched;
            taction += rules[i].taction.get();
        }

        log.print("\"Gazetter " + order + "\",");
        log.print(completed);
        log.print(',');
        log.print(matched);
        log.print(',');
        log.print(gtime - taction);
        log.print(',');
        log.print(taction);
        log.print(',');
        log.println(gtime);

        System.out.println("Gazetteer " + order + " time: " + gtime + " ms");
    }
}
