/*************************************************************************/
/* Copyright (c) 2004                                                    */
/* Daniel Sleator, David Temperley, and John Lafferty                    */
/* Copyright (c) 2009, 2013 Linas Vepstas                                */
/* All rights reserved                                                   */
/*                                                                       */
/* Use of the link grammar parsing system is subject to the terms of the */
/* license set forth in the LICENSE file included with this software.    */
/* This license allows free redistribution and use in source and binary  */
/* forms, with or without modification, subject to certain conditions.   */
/*                                                                       */
/*************************************************************************/

#include "api-structures.h"
#include "count.h"
#include "dict-api.h"  /* for print_expression when debugging */
#include "disjunct-utils.h"
#include "externs.h"
#include "post-process.h"
#include "print.h"
#include "prune.h"
#include "resources.h"
#include "word-utils.h"

#define CONTABSZ 8192
typedef Connector * connector_table;

typedef struct disjunct_dup_table_s disjunct_dup_table;
struct disjunct_dup_table_s
{
	unsigned int dup_table_size;
	Disjunct ** dup_table;
};

/* Indicator that this connector cannot be used -- that its "obsolete".  */
#define BAD_WORD (MAX_SENTENCE+1)

typedef struct c_list_s C_list;
struct c_list_s
{
	Connector * c;
	Boolean shallow;
	C_list * next;
};

typedef struct power_table_s power_table;
struct power_table_s
{
	unsigned int power_table_size;
	unsigned int l_table_size[MAX_SENTENCE];  /* the sizes of the hash tables */
	unsigned int r_table_size[MAX_SENTENCE];
	C_list ** l_table[MAX_SENTENCE];
	C_list ** r_table[MAX_SENTENCE];
};

typedef struct cms_struct Cms;
struct cms_struct
{
	Cms * next;
	const char * name;
	int count;	  /* the number of times this is in the multiset */
};

#define CMS_SIZE (2<<10)
typedef struct multiset_table_s multiset_table;
struct multiset_table_s
{
	Cms * cms_table[CMS_SIZE];
};

typedef struct prune_context_s prune_context;
struct prune_context_s
{
	Boolean null_links;
#ifdef USE_FAT_LINKAGES
	char ** deletable;
	char ** effective_dist;
	int power_prune_mode;  /* either GENTLE or RUTHLESS */
#endif /* USE_FAT_LINKAGES */
	int power_cost;
	int N_changed;   /* counts the number of changes
						   of c->word fields in a pass */

	power_table *pt;
	Sentence sent;
};

/*

  The algorithms in this file prune disjuncts from the disjunct list
  of the sentence that can be elimininated by a simple checks.  The first
  check works as follows:

  A series of passes are made through the sentence, alternating
  left-to-right and right-to-left.  Consier the left-to-right pass (the
  other is symmetric).  A set S of connectors is maintained (initialized
  to be empty).  Now the disjuncts of the current word are processed.
  If a given disjunct's left pointing connectors have the property that
  at least one of them has no connector in S to which it can be matched,
  then that disjunct is deleted. Now the set S is augmented by the right
  connectors of the remaining disjuncts of that word.  This completes
  one word.  The process continues through the words from left to right.
  Alternate passes are made until no disjunct is deleted.

  It worries me a little that if there are some really huge disjuncts lists,
  then this process will probably do nothing.  (This fear turns out to be
  unfounded.)

  Notes:  Power pruning will not work if applied before generating the
  "and" disjuncts.  This is because certain of it's tricks don't work.
  Think about this, and finish this note later....
  Also, currently I use the standard connector match procedure instead
  of the pruning one, since I know power pruning will not be used before
  and generation.  Replace this to allow power pruning to work before
  generating and disjuncts.

  Currently it seems that normal pruning, power pruning, and generation,
  pruning, and power pruning (after "and" generation) and parsing take
  about the same amount of time.  This is why doing power pruning before
  "and" generation might be a very good idea.

  New idea:  Suppose all the disjuncts of a word have a connector of type
  c pointing to the right.  And further, suppose that there is exactly one
  word to its right containing that type of connector pointing to the left.
  Then all the other disjuncts on the latter word can be deleted.
  (This situation is created by the processing of "either...or", and by
  the extra disjuncts added to a "," neighboring a conjunction.)

*/

/**
 * This hash function only looks at the leading upper case letters of
 * the connector string, and the label fields.  This ensures that if two
 * strings match (formally), then they must hash to the same place.
 */
static inline unsigned int hash_S(Connector * c)
{
	unsigned int h = connector_hash(c);
	return (h & (CONTABSZ-1));
}

/**
 * This is almost identical to do_match().  Its reason for existance
 * is the rather subtle fact that with "and" can transform a "Ss"
 * connector into "Sp".  This means that in order for pruning to
 * work, we must allow a "Ss" connector on word match an "Sp" connector
 * on a word to its right.  This is what this version of match allows.
 * We assume that a is on a word to the left of b.
 */
#ifdef USE_FAT_LINKAGES
int prune_match(int dist, Connector *a, Connector *b)
{
	const char *s, *t;
	int x, y;

	if (a->label != b->label) return FALSE;

	s = a->string;
	t = b->string;

	while(isupper((int)*s) || isupper((int)*t))
	{
		if (*s != *t) return FALSE;
		s++;
		t++;
	}

	/*	printf("PM: a=%4s b=%4s  ap=%d bp=%d  a->ll=%d b->ll=%d  dist=%d\n",
	   s, t, x, y, a->length_limit, b->length_limit, dist); */
	if (dist > a->length_limit || dist > b->length_limit) return FALSE;

	x = a->priority;
	y = b->priority;

	if ((x == THIN_priority) && (y == THIN_priority))
	{
#if defined(PLURALIZATION)
/*
		if ((*(a->string)=='S') && ((*s=='s') || (*s=='p')) &&  (*t=='p')) {
			return TRUE;
		}
*/
/*
   The above is a kludge to stop pruning from killing off disjuncts
   which (because of pluralization in and) might become valid later.
   Recall that "and" converts a singular subject into a plural one.
   The (*s=='p') part is so that "he and I are good" doesn't get killed off.
   The above hack is subsumed by the following one:
*/
		if ((*(a->string)=='S') && ((*s=='s') || (*s=='p')) &&
			((*t=='p') || (*t=='s')) &&
			((s-1 == a->string) || ((s-2 == a->string) && (*(s-1) == 'I')))){
			return TRUE;
		}
/*
   This change is to accommodate "nor".  In particular we need to
   prevent "neither John nor I likes dogs" from being killed off.
   We want to allow this to apply to "are neither a dog nor a cat here"
   and "is neither a dog nor a cat here".  This uses the "SI" connector.
   The third line above ensures that the connector is either "S" or "SI".
*/
#endif
		while ((*s != '\0') && (*t != '\0'))
		{
			if ((*s == '*') || (*t == '*') ||
				((*s == *t) && (*s != '^')))
			  /* this last case here is rather obscure.  It prevents
				 '^' from matching '^'.....Is this necessary?
					 ......yes, I think it is.   */
			{
				s++;
				t++;
			}
			else
				return FALSE;
		}
		return TRUE;
	}
	else if ((x == UP_priority) && (y == DOWN_priority))
	{
		while ((*s!='\0') && (*t!='\0'))
		{
			if ((*s == *t) || (*s == '*') || (*t == '^'))
			{
				/* that '^' should match on the DOWN_priority
				   node is subtle, but correct */
				s++;
				t++;
			}
			else
				return FALSE;
		}
		return TRUE;
	}
	else if ((y == UP_priority) && (x == DOWN_priority))
	{
		while ((*s!='\0') && (*t!='\0'))
		{
			if ((*s == *t) || (*t == '*') || (*s == '^'))
			{
				s++;
				t++;
			}
			else
				return FALSE;
		}
		return TRUE;
	}
	else
		return FALSE;
}

#else /* not USE_FAT_LINKAGES */

int prune_match(int dist, Connector *a, Connector *b)
{
	const char *s, *t;

	s = a->string;
	t = b->string;

	while(isupper((int)*s) || isupper((int)*t))
	{
		if (*s != *t) return FALSE;
		s++;
		t++;
	}

	if (dist > a->length_limit || dist > b->length_limit) return FALSE;

	while ((*s != '\0') && (*t != '\0'))
	{
		if ((*s == '*') || (*t == '*') || (*s == *t))
		{
			s++;
			t++;
		}
		else
			return FALSE;
	}
	return TRUE;
}
#endif /* USE_FAT_LINKAGES */

static void zero_connector_table(connector_table *ct)
{
	memset(ct, 0, sizeof(Connector *) * CONTABSZ);
}

/**
 * This function puts connector c into the connector table
 * if one like it isn't already there.
 */
static void insert_connector(connector_table *ct, Connector * c)
{
	unsigned int h;
	Connector * e;

	h = hash_S(c);

	for (e = ct[h]; e != NULL; e = e->tableNext)
	{
#ifdef USE_FAT_LINKAGES
		if ((strcmp(c->string, e->string) == 0) &&
		    (c->priority == e->priority) &&
		    (c->label == e->label))
			return;
#else /* USE_FAT_LINKAGES */
		if (strcmp(c->string, e->string) == 0)
			return;
#endif /* USE_FAT_LINKAGES */
	}
	c->tableNext = ct[h];
	ct[h] = c;
}

#if 0 /* DEAD_CODE_NOT_USED_ANYWHERE */
void prune(Sentence sent)
{
	Connector *e, *f;
	int w;
	int N_deleted;
	Connector *ct[CONTABSZ];
	Disjunct fake_head, *d, *d1;

#ifdef USE_FAT_LINKAGES
	/* XXX why is this here ?? */
	count_set_effective_distance(sent);
#endif /* USE_FAT_LINKAGES */

	N_deleted = 1;  /* a lie to make it always do at least 2 passes */
	while (1)
	{
		/* Left-to-right pass */
		zero_connector_table(ct);

		/* For every word */
		for (w = 0; w < sent->length; w++)
		{
			d = &fake_head;
			d->next = sent->word[w].d;

			/* For every disjunct of word */
			while ((d1 = d->next))
			{
				e = d1->left;

				/* For every left clause of this disjunct */
				while (e)
				{
					unsigned int h = hash_S(e);
					for (f = ct[h]; f != NULL; f = f->tableNext)
					{
						if (prune_match(0, f, e)) break;
					}
					if (!f) break; /* If f null, not a single match was found */
					e = e->next;
				}

				/* We know this disjunct is dead since no match
				 * can be found on a required clause. */
				if (e)
				{
					N_deleted ++;
					free_connectors(d1->left);
					free_connectors(d1->right);
					d->next = d1->next;
					xfree(d1, sizeof(Disjunct));
				}
				else
				{
			 		/* Store surviving disjunct in hash table */
					for (e = d1->right; e != NULL; e = e->next)
					{
						insert_connector(ct, e);
					}
					d = d1; /* move on to next disjunct*/
				}
			}
			sent->word[w].d = fake_head.next;
		}

		if (2 < verbosity)
		{
			printf("l->r pass removed %d\n", N_deleted);
			print_disjunct_counts(sent);
		}

		/* We did nothing (and this is not the 1st pass) */
		if (N_deleted == 0) break;

		/* Right-to-left pass */
		zero_connector_table(ct);
		N_deleted = 0;

		/* For every word */
		for (w = sent->length-1; w >= 0; w--)
		{
			d = &fake_head;
			d->next = sent->word[w].d;

			while ((d1 = d->next))
			{
				e = d1->right;

				while (e)
				{
					unsigned int h = hash_S(e);
					for (f = ct[h]; f != NULL; f = f->tableNext)
					{
						if (prune_match(0, e, f)) break;
					}
					if (!f) break; /* If f null, not a single match was found */
					e = e->next;
				}

				/* We know this disjunct is dead since it can't match
				 * to the right*/
				if(e)
				{
					N_deleted ++;
					free_connectors(d1->left);
					free_connectors(d1->right);
					d->next = d1->next;
					xfree(d1, sizeof(Disjunct));
				}
				else
				{
					/* Store surviving disjunct in hash table */
					for (e = d1->left; e != NULL; e = e->next)
					{
						insert_connector(ct, e);
					}
					d = d1; /* move on to next disjunct*/
				}
				sent->word[w].d = fake_head.next;
			}
		}

		if (verbosity > 2)
		{
			printf("r->l pass removed %d\n", N_deleted);
			print_disjunct_counts(sent);
		}

		/* We made no change on this pass */
		if (N_deleted == 0) break;
		N_deleted = 0;
	}
}
#endif /* DEAD_CODE_NOT_USED_ANYWHERE */

/*
   The second algorithm eliminates disjuncts that are dominated by
   another.  It works by hashing them all, and checking for domination.
*/

#if 0
/* ============================================================x */

/*
  Consider the idea of deleting a disjunct if it is dominated (in terms of
  what it can match) by some other disjunct on the same word.  This has
  been implemented below.  There are three problems with it:

  (1) It is almost never the case that any disjuncts are eliminated.
      (The code below has works correctly with fat links, but because
      all of the fat connectors on a fat disjunct have the same matching
      string, the only time a disjuct will die is if it is the same
      as another one.  This is captured by the simplistic version below.

  (2) connector_matches_alam may not be exactly correct.  I don't
      think it does the fat link matches properly.   (See the comment
      in and.c for more information about matching fat links.)  This is
      irrelevant because of (1).

  (3) The linkage that is eliminated by this, might just be the one that
      passes post-processing, as the following example shows.
      This is pretty silly, and should probably be changed.

> telling John how our program works would be stupid
Accepted (2 linkages, 1 with no P.P. violations)
  Linkage 1, cost vector = (0, 0, 7)

   +------------------G-----------------+
   +-----R-----+----CL----+             |
   +---O---+   |   +---D--+---S---+     +--I-+-AI-+
   |       |   |   |      |       |     |    |    |
telling.g John how our program.n works would be stupid

               /////         CLg    <---CLg--->  CL      telling.g
 (g)           telling.g     G      <---G----->  G       would
 (g) (d)       telling.g     R      <---R----->  R       how
 (g) (d)       telling.g     O      <---O----->  O       John
 (g) (d)       how           CLe    <---CLe--->  CL      program.n
 (g) (d) (e)   our           D      <---Ds---->  Ds      program.n
 (g) (d) (e)   program.n     Ss     <---Ss---->  Ss      works
 (g)           would         I      <---Ix---->  Ix      be
 (g)           be            AI     <---AIi--->  AIi     stupid

(press return for another)
>
  Linkage 2 (bad), cost vector = (0, 0, 7)

   +------------------G-----------------+
   +-----R-----+----CL----+             |
   +---O---+   |   +---D--+---S---+     +--I-+-AI-+
   |       |   |   |      |       |     |    |    |
telling.g John how our program.n works would be stupid

               /////         CLg    <---CLg--->  CL      telling.g
 (g)           telling.g     G      <---G----->  G       would
 (g) (d)       telling.g     R      <---R----->  R       how
 (g) (d)       telling.g     O      <---O----->  O       John
 (g) (d)       how           CLe    <---CLe--->  CL      program.n
 (g) (d) (e)   our           D      <---Ds---->  Ds      program.n
 (g) (d) (e)   program.n     Ss     <---Ss---->  Ss      works
 (g)           would         I      <---Ix---->  Ix      be
 (g)           be            AI     <---AI---->  AI      stupid

P.P. violations:
      Special subject rule violated
*/

/**
 * hash function that takes a string and a seed value i
 */
static int string_hash(disjunct_dup_table *dt, const char * s, int i)
{
	for(;*s != '\0';s++) i = i + (i<<1) + randtable[(*s + i) & (RTSIZE-1)];
	return (i & (dt->dup_table_size-1));
}

/**
 * This returns true if the connector a matches everything that b
 *  matches, and possibly more.  (alam=at least as much)
 *
 * TRUE for equal connectors.
 * remains TRUE if multi-match added to the first.
 * remains TRUE if subsrcripts deleted from the first.
 */
int connector_matches_alam(Connector * a, Connector * b)
{
	char * s, * t, *u;
	if (((!a->multi) && b->multi) ||
		(a->label != b->label) ||
		(a->priority != b->priority))  return FALSE;
	s = a->string;
	t = b->string;

	/* isupper -- connectors cannot be UTF8 at this time */
	while(isupper(*s) || isupper(*t))
	{
		if (*s == *t) {
			s++;
			t++;
		} else return FALSE;
	}
	if (a->priority == DOWN_priority) {
		u = s;
		s = t;
		t = u;
	}
	while((*s != '\0') && (*t != '\0')) {
		if ((*s == *t) || (*s == '*') || (*t == '^')) {
			s++;
			t++;
		} else return FALSE;
	}
	while ((*s != '\0') && (*s == '*')) s++;
	return (*s == '\0');
}


/**
 * This hash function that takes a connector and a seed value i.
 * It only looks at the leading upper case letters of
 * the string, and the label.  This ensures that if two connectors
 * match, then they must hash to the same place.
 */
static int conn_hash(Connector * c, int i)
{
	int nb;
	const char * s;
	s = c->string;

	i = i + (i<<1) + randtable[(c->label + i) & (RTSIZE-1)];
	nb = is_utf8_upper(s);
	while (nb)
	{
		i = i + (i<<1) + randtable[(*s + i) & (RTSIZE-1)];
		s += nb;
		nb = is_utf8_upper(s);
	}
	return i;
}

static inline int pconnector_hash(disjunct_dup_table *dt, Connector * c, int i)
{
	i = conn_hash(c, i);
	return (i & (ct->dup_table_size-1));
}

/**
 * This is a hash function for disjuncts
 */
static int hash_disjunct(disjunct_dup_table *dt, Disjunct * d)
{
	int i;
	Connector *e;
	i = 0;
	for (e = d->left ; e != NULL; e = e->next)
	{
		i = pconnector_hash(dt, e, i);
	}
	for (e = d->right ; e != NULL; e = e->next)
	{
		i = pconnector_hash(dt, e, i);
	}
	return string_hash(dt, d->string, i);
}

/**
 * Returns TRUE if disjunct d1 can match anything that d2 can
 * if this happens, it constitutes a proof that there is absolutely
 * no use for d2.
 */
static int disjunct_matches_alam(Disjunct * d1, Disjunct * d2)
{
	Connector *e1, *e2;
	if (d1->cost > d2->cost) return FALSE;
	e1 = d1->left;
	e2 = d2->left;
	while((e1!=NULL) && (e2!=NULL)) {
		if (!connector_matches_alam(e1,e2)) break;
		e1 = e1->next;
		e2 = e2->next;
	}
	if ((e1!=NULL) || (e2!=NULL)) return FALSE;
	e1 = d1->right;
	e2 = d2->right;
	while((e1!=NULL) && (e2!=NULL)) {
		if (!connector_matches_alam(e1,e2)) break;
		e1 = e1->next;
		e2 = e2->next;
	}
	if ((e1!=NULL) || (e2!=NULL)) return FALSE;
	return (strcmp(d1->string, d2->string) == 0);
}

/**
 * Takes the list of disjuncts pointed to by d, eliminates all
 * duplicates, and returns a pointer to a new list.
 * It frees the disjuncts that are eliminated.
 */
Disjunct * eliminate_duplicate_disjuncts(Disjunct * d)
{
	int i, h, count;
	Disjunct *dn, *dx, *dxn, *front;
	count = 0;
	disjunct_dup_table *dt;

	dt = disjunct_dup_table_new(next_power_of_two_up(2 * count_disjuncts(d)));

	for (;d!=NULL; d = dn)
	{
		dn = d->next;
		h = hash_disjunct(d);

		front = NULL;
		for (dx = dt->dup_table[h]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			if (disjunct_matches_alam(dx,d))
			{
				/* we know that d should be killed */
				d->next = NULL;
				free_disjuncts(d);
				count++;
				front = catenate_disjuncts(front, dx);
				break;
			} else if (disjunct_matches_alam(d,dx)) {
				/* we know that dx should be killed off */
				dx->next = NULL;
				free_disjuncts(dx);
				count++;
			} else {
				/* neither should be killed off */
				dx->next = front;
				front = dx;
			}
		}
		if (dx == NULL) {
			/* we put d in the table */
			d->next = front;
			front = d;
		}
		dt->dup_table[h] = front;
	}

	/* d is now NULL */
	for (i = 0; i < dt->dup_table_size; i++)
	{
		for (dx = dt->dup_table[i]; dx != NULL; dx = dxn)
		{
			dxn = dx->next;
			dx->next = d;
			d = dx;
		}
	}

	if ((verbosity > 2) && (count != 0)) printf("killed %d duplicates\n", count);

	disjunct_dup_table_delete(dt);
	return d;
}

/* ============================================================x */
#endif

/* ================================================================= */
/**
 * Here is expression pruning.  This is done even before the expressions
 * are turned into lists of disjuncts.
 *
 * This uses many of the same data structures and functions that are used
 * by prune.
 *
 * The purge operations remove all irrelevant stuff from the expression,
 * and free the purged stuff.  A connector is deemed irrelevant if its
 * string pointer has been set to NULL.  The passes through the sentence
 * have the job of doing this.
 *
 * If an OR or AND type expression node has one child, we can replace it
 * by its child.  This, of course, is not really necessary, except for
 * performance(?)
 */

static Exp* purge_Exp(Exp *);

/**
 * Get rid of the elements with null expressions
 */
static E_list * or_purge_E_list(E_list * l)
{
	E_list * el;
	if (l == NULL) return NULL;
	if ((l->e = purge_Exp(l->e)) == NULL)
	{
		el = or_purge_E_list(l->next);
		xfree((char *)l, sizeof(E_list));
		return el;
	}
	l->next = or_purge_E_list(l->next);
	return l;
}

/**
 * Returns 0 iff the length of the disjunct list is 0.
 * If this is the case, it frees the structure rooted at l.
 */
static int and_purge_E_list(E_list * l)
{
	if (l == NULL) return 1;
	if ((l->e = purge_Exp(l->e)) == NULL)
	{
		free_E_list(l->next);
		xfree((char *)l, sizeof(E_list));
		return 0;
	}
	if (and_purge_E_list(l->next) == 0)
	{
		free_Exp(l->e);
		xfree((char *)l, sizeof(E_list));
		return 0;
	}
	return 1;
}

/**
 * Must be called with a non-null expression.
 * Return NULL iff the expression has no disjuncts.
 */
static Exp* purge_Exp(Exp *e)
{
	if (e->type == CONNECTOR_type)
	{
		if (e->u.string == NULL)
		{
			xfree((char *)e, sizeof(Exp));
			return NULL;
		}
		else
		{
			return e;
		}
	}
	if (e->type == AND_type)
	{
		if (and_purge_E_list(e->u.l) == 0)
		{
			xfree((char *)e, sizeof(Exp));
			return NULL;
		}
	}
	else
	{
		e->u.l = or_purge_E_list(e->u.l);
		if (e->u.l == NULL)
		{
			xfree((char *)e, sizeof(Exp));
			return NULL;
		}
	}

/* This code makes it kill off nodes that have just one child
   (1) It's going to give an insignificant speed-up
   (2) Costs have not been handled correctly here.
   The code is excised for these reasons.
*/
/*
	if ((e->u.l != NULL) && (e->u.l->next == NULL))
	{
		ne = e->u.l->e;
		xfree((char *) e->u.l, sizeof(E_list));
		xfree((char *) e, sizeof(Exp));
		return ne;
	}
*/
	return e;
}

/**
 * Returns TRUE if c can match anything in the set S.
 */
static inline int matches_S(connector_table *ct, Connector * c, int dir)
{
	Connector * e;
	unsigned int h = hash_S(c);

	if (dir == '-')
	{
		for (e = ct[h]; e != NULL; e = e->tableNext)
		{
			if (prune_match(0, e, c)) return TRUE;
		}
		return FALSE;
	}
	else
	{
		for (e = ct[h]; e != NULL; e = e->tableNext)
		{
			if (prune_match(0, c, e)) return TRUE;
		}
		return FALSE;
	}
}

/**
 * Mark as dead all of the dir-pointing connectors
 * in e that are not matched by anything in the current set.
 * Returns the number of connectors so marked.
 */
static int mark_dead_connectors(connector_table *ct, Exp * e, int dir)
{
	int count;
	count = 0;
	if (e->type == CONNECTOR_type)
	{
		if (e->dir == dir)
		{
			Connector dummy;
			init_connector(&dummy);
#ifdef USE_FAT_LINKAGES
			dummy.label = NORMAL_LABEL;
			dummy.priority = THIN_priority;
#endif /* USE_FAT_LINKAGES */
			dummy.string = e->u.string;
			if (!matches_S(ct, &dummy, dir))
			{
				e->u.string = NULL;
				count++;
			}
		}
	}
	else
	{
		E_list *l;
		for (l = e->u.l; l != NULL; l = l->next)
		{
			count += mark_dead_connectors(ct, l->e, dir);
		}
	}
	return count;
}

/**
 * Put into the set S all of the dir-pointing connectors still in e.
 * Return a list of allocated dummy connectors; these will need to be
 * freed.
 */
static Connector * insert_connectors(connector_table *ct, Exp * e,
                                     Connector *alloc_list, int dir)
{
	if (e->type == CONNECTOR_type)
	{
		if (e->dir == dir)
		{
			Connector *dummy = connector_new();
			dummy->string = e->u.string;
			insert_connector(ct, dummy);
			dummy->next = alloc_list;
			alloc_list = dummy;
		}
	}
	else
	{
		E_list *l;
		for (l=e->u.l; l!=NULL; l=l->next)
		{
			alloc_list = insert_connectors(ct, l->e, alloc_list, dir);
		}
	}
	return alloc_list;
}

/**
 * This removes the expressions that are empty from the list corresponding
 * to word w of the sentence.
 */
static void clean_up_expressions(Sentence sent, int w)
{
	X_node head_node, *d, *d1;
	d = &head_node;
	d->next = sent->word[w].x;
	while (d->next != NULL)
	{
		if (d->next->exp == NULL)
		{
			d1 = d->next;
			d->next = d1->next;
			xfree((char *)d1, sizeof(X_node));
		}
		else
		{
			d = d->next;
		}
	}
	sent->word[w].x = head_node.next;
}

/* #define DBG(X) X */
#define DBG(X)

void expression_prune(Sentence sent)
{
	int N_deleted;
	X_node * x;
	int w;
	Connector *ct[CONTABSZ];
	Connector *dummy_list = NULL;

	zero_connector_table(ct);

	N_deleted = 1;  /* a lie to make it always do at least 2 passes */

	while (1)
	{
		/* Left-to-right pass */
		/* For every word */
		for (w = 0; w < sent->length; w++)
		{
			/* For every expression in word */
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
DBG(printf("before marking: "); print_expression(x->exp); printf("\n"););
				N_deleted += mark_dead_connectors(ct, x->exp, '-');
DBG(printf(" after marking: "); print_expression(x->exp); printf("\n"););
			}
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
DBG(printf("before purging: "); print_expression(x->exp); printf("\n"););
				x->exp = purge_Exp(x->exp);
DBG(printf("after purging: "); print_expression(x->exp); printf("\n"););
			}

			/* gets rid of X_nodes with NULL exp */
			clean_up_expressions(sent, w);
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
				dummy_list = insert_connectors(ct, x->exp, dummy_list, '+');
			}
		}

		if (verbosity > 2)
		{
			printf("l->r pass removed %d\n", N_deleted);
			print_expression_sizes(sent);
		}

		/* Free the allocated dummy connectors */
		free_connectors(dummy_list);
		dummy_list = NULL;
		zero_connector_table(ct);

		if (N_deleted == 0) break;

		/* Right-to-left pass */
		N_deleted = 0;
		for (w = sent->length-1; w >= 0; w--)
		{
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
/*	 printf("before marking: "); print_expression(x->exp); printf("\n"); */
				N_deleted += mark_dead_connectors(ct, x->exp, '+');
/*	 printf("after marking: "); print_expression(x->exp); printf("\n"); */
			}
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
/*	 printf("before perging: "); print_expression(x->exp); printf("\n"); */
				x->exp = purge_Exp(x->exp);
/*	 printf("after perging: "); print_expression(x->exp); printf("\n"); */
			}
			clean_up_expressions(sent, w);  /* gets rid of X_nodes with NULL exp */
			for (x = sent->word[w].x; x != NULL; x = x->next)
			{
				dummy_list = insert_connectors(ct, x->exp, dummy_list, '-');
			}
		}

		if (verbosity > 2)
		{
			printf("r->l pass removed %d\n", N_deleted);
			print_expression_sizes(sent);
		}

		/* Free the allocated dummy connectors */
		free_connectors(dummy_list);
		dummy_list = NULL;
		zero_connector_table(ct);
		if (N_deleted == 0) break;
		N_deleted = 0;
	}
}


/*
  Here is what you've been waiting for: POWER-PRUNE

  The kinds of constraints it checks for are the following:

	1) successive connectors on the same disjunct have to go to
	   nearer and nearer words.

	2) two deep connectors cannot attach to each other
	   (A connectors is deep if it is not the first in its list; it
	   is shallow if it is the first in its list; it is deepest if it
	   is the last on its list.)

	3) on two adjacent words, a pair of connectors can be used
	   only if they're the deepest ones on their disjuncts

	4) on two non-adjacent words, a pair of connectors can be used only
	   if not [both of them are the deepest].

   The data structure consists of a pair of hash tables on every word.
   Each bucket of a hash table has a list of pointers to connectors.
   These nodes also store if the chosen connector is shallow.
*/
/*
   As with normal pruning, we make alternate left->right and right->left
   passes.  In the R->L pass, when we're on a word w, we make use of
   all the left-pointing hash tables on the words to the right of w.
   After the pruning on this word, we build the left-pointing hash table
   this word.  This guarantees idempotence of the pass -- after doing an
   L->R, doing another would change nothing.

   Each connector has an integer c_word field.  This refers to the closest
   word that it could be connected to.  These are initially determined by
   how deep the connector is.  For example, a deepest connector can connect
   to the neighboring word, so its c_word field is w+1 (w-1 if this is a left
   pointing connector).  It's neighboring shallow connector has a c_word
   value of w+2, etc.

   The pruning process adjusts these c_word values as it goes along,
   accumulating information about any way of linking this sentence.
   The pruning process stops only after no disjunct is deleted and no
   c_word values change.

   The difference between RUTHLESS and GENTLE power pruning is simply
   that GENTLE uses the deletable region array, and RUTHLESS does not.
   So we can get the effect of these two different methods simply by
   always unsuring that deletable[][] has been defined.  With nothing
   deletable, this is equivalent to RUTHLESS.   --DS, 7/97
*/

/**
 * returns the number of connectors in the left lists of the disjuncts.
 */
static int left_connector_count(Disjunct * d)
{
	Connector *c;
	int i=0;
	for (;d!=NULL; d=d->next) {
		for (c = d->left; c!=NULL; c = c->next) i++;
	}
	return i;
}

static int right_connector_count(Disjunct * d)
{
	Connector *c;
	int i=0;
	for (;d!=NULL; d=d->next) {
	  for (c = d->right; c!=NULL; c = c->next) i++;
	}
	return i;
}

static void free_C_list(C_list * t)
{
	C_list *xt;
	for (; t!=NULL; t=xt) {
		xt = t->next;
		xfree((char *)t, sizeof(C_list));
	}
}

/**
 * free all of the hash tables and C_lists
 */
static void power_table_delete(power_table *pt)
{
	unsigned int w;
	unsigned int i;

	for (w = 0; w < pt->power_table_size; w++)
	{
		for (i = 0; i < pt->l_table_size[w]; i++)
		{
			free_C_list(pt->l_table[w][i]);
		}
		xfree((char *)pt->l_table[w], pt->l_table_size[w] * sizeof (C_list *));

		for (i = 0; i < pt->r_table_size[w]; i++)
		{
			free_C_list(pt->r_table[w][i]);
		}
		xfree((char *)pt->r_table[w], pt->r_table_size[w] * sizeof (C_list *));
	}
	xfree(pt, sizeof(power_table));
}

/**
 * The disjunct d (whose left or right pointer points to c) is put
 * into the appropriate hash table
 */
static void put_into_power_table(unsigned int size, C_list ** t, Connector * c, Boolean shal)
{
	unsigned int h;
	C_list * m;
	h = connector_hash(c) & (size-1);
	m = (C_list *) xalloc (sizeof(C_list));
	m->next = t[h];
	t[h] = m;
	m->c = c;
	m->shallow = shal;
}

static int set_dist_fields(Connector * c, int w, int delta)
{
	int i;
	if (c==NULL) return w;
	i = set_dist_fields(c->next, w, delta) + delta;
	c->word = i;
	return i;
}

/**
 * Allocates and builds the initial power hash tables
 */
static power_table * power_table_new(Sentence sent)
{
	power_table *pt;
	int w, len;
	unsigned int i, size;
	C_list ** t;
	Disjunct * d, * xd, * head;
	Connector * c;

	pt = (power_table *) xalloc (sizeof(power_table));
	pt->power_table_size = sent->length;

   /* First, we initialize the word fields of the connectors, and
	  eliminate those disjuncts with illegal connectors */
	for (w=0; w<sent->length; w++)
	{
	  head = NULL;
	  for (d=sent->word[w].d; d!=NULL; d=xd) {
		  xd = d->next;
		  if ((set_dist_fields(d->left, w, -1) < 0) ||
			  (set_dist_fields(d->right, w, 1) >= sent->length)) {
			  d->next = NULL;
			  free_disjuncts(d);
		  } else {
			  d->next = head;
			  head = d;
		  }
	  }
	  sent->word[w].d = head;
	}

	for (w=0; w<sent->length; w++)
	{
		/* The below uses variable-sized hash tables. This seems to
		 * provide performance that is equal or better than the best
		 * fixed-size prformance.
		 * The best fixed-size performance seems to come at about
		 * a 1K table size, for both English and Russian. (Both have
		 * about 100 fixed link-types, and many thousands of auto-genned
		 * link types (IDxxx idioms for both, LLxxx suffix links for
		 * Russian).  Plusses and minuses:
		 * + small fixed tables are faster to initialize.
		 * - small fixed tables have more collisions
		 * - variable-size tables require counting connectors.
		 *   (and the more complex code to go with)
		 * CPU cache-size effects ...
		 * Strong depenence on the hashing algo!
		 */
		len = left_connector_count(sent->word[w].d);
		size = next_power_of_two_up(len);
#define TOPSZ 32768
		if (TOPSZ < size) size = TOPSZ;
		pt->l_table_size[w] = size;
		t = pt->l_table[w] = (C_list **) xalloc(size * sizeof(C_list *));
		for (i=0; i<size; i++) t[i] = NULL;

		for (d=sent->word[w].d; d!=NULL; d=d->next) {
			c = d->left;
			if (c != NULL) {
				put_into_power_table(size, t, c, TRUE);
				for (c=c->next; c!=NULL; c=c->next){
					put_into_power_table(size, t, c, FALSE);
				}
			}
		}

		len = right_connector_count(sent->word[w].d);
		size = next_power_of_two_up(len);
		if (TOPSZ < size) size = TOPSZ;
		pt->r_table_size[w] = size;
		t = pt->r_table[w] = (C_list **) xalloc(size * sizeof(C_list *));
		for (i=0; i<size; i++) t[i] = NULL;

		for (d=sent->word[w].d; d!=NULL; d=d->next) {
			c = d->right;
			if (c != NULL) {
				put_into_power_table(size, t, c, TRUE);
				for (c=c->next; c!=NULL; c=c->next){
					put_into_power_table(size, t, c, FALSE);
				}
			}
		}
	}

	return pt;
}

/**
 * This runs through all the connectors in this table, and eliminates those
 * who are obsolete.  The word fields of an obsolete one has been set to
 * BAD_WORD.
 */
static void clean_table(unsigned int size, C_list ** t)
{
	unsigned int i;
	C_list * m, * xm, * head;
	for (i = 0; i < size; i++) {
		head = NULL;
		for (m = t[i]; m != NULL; m = xm) {
			xm = m->next;
			if (m->c->word != BAD_WORD) {
				m->next = head;
				head = m;
			} else {
				xfree((char *) m, sizeof(C_list));
			}
		}
		t[i] = head;
	}
}

/**
 * This takes two connectors (and whether these are shallow or not)
 * (and the two words that these came from) and returns TRUE if it is
 * possible for these two to match based on local considerations.
 */
#ifdef USE_FAT_LINKAGES
static Boolean possible_connection(prune_context *pc,
                               Connector *lc, Connector *rc,
                               Boolean lshallow, Boolean rshallow,
                               int lword, int rword)
{
	if ((!lshallow) && (!rshallow)) return FALSE;
	  /* two deep connectors can't work */
	if ((lc->word > rword) || (rc->word < lword)) return FALSE;
	  /* word range constraints */

	assert(lword < rword, "Bad word order in possible connection.");

	/* Now, notice that the only differences between the following two
	   cases is that (1) ruthless uses do_match and gentle uses prune_match.
	   and (2) ruthless doesn't use deletable[][].  This latter fact is
	   irrelevant, since deletable[][] is now guaranteed to have been
	   created. */

	if (pc->power_prune_mode == RUTHLESS) {
		if (lword == rword-1) {
			if (!((lc->next == NULL) && (rc->next == NULL))) return FALSE;
		} else {
			if ((!pc->null_links) &&
				(lc->next == NULL) && (rc->next == NULL) && (!lc->multi) && (!rc->multi)) {
				return FALSE;
			}
		}
		return do_match(pc->sent, lc, rc, lword, rword);
	} else {
		if (lword == rword-1) {
			if (!((lc->next == NULL) && (rc->next == NULL))) return FALSE;
		} else {
			if ((!pc->null_links) &&
				(lc->next == NULL) && (rc->next == NULL) && (!lc->multi) && (!rc->multi)
				&& !pc->deletable[lword][rword]
				)
			{
				return FALSE;
			}
		}
		return prune_match(pc->effective_dist[lword][rword], lc, rc);
	}
}
#else /* not USE_FAT_LINKAGES */

static Boolean possible_connection(prune_context *pc,
                               Connector *lc, Connector *rc,
                               Boolean lshallow, Boolean rshallow,
                               int lword, int rword)
{
	if ((!lshallow) && (!rshallow)) return FALSE;
	  /* two deep connectors can't work */
	if ((lc->word > rword) || (rc->word < lword)) return FALSE;
	  /* word range constraints */

	assert(lword < rword, "Bad word order in possible connection.");

	if (lword == rword-1) {
		if (!((lc->next == NULL) && (rc->next == NULL))) return FALSE;
	}
	else
	if ((!pc->null_links) &&
	    (lc->next == NULL) &&
	    (rc->next == NULL) &&
	    (!lc->multi) && (!rc->multi))
	{
		return FALSE;
	}
	return prune_match(rword - lword, lc, rc);
}
#endif /* USE_FAT_LINKAGES */


/**
 * This returns TRUE if the right table of word w contains
 * a connector that can match to c.  shallow tells if c is shallow.
 */
static Boolean
right_table_search(prune_context *pc, int w, Connector *c,
                   Boolean shallow, int word_c)
{
	unsigned int size, h;
	C_list *cl;
	power_table *pt;

	pt = pc->pt;
	size = pt->r_table_size[w];
	h = connector_hash(c) & (size-1);
	for (cl = pt->r_table[w][h]; cl != NULL; cl = cl->next)
	{
		if (possible_connection(pc, cl->c, c, cl->shallow, shallow, w, word_c))
		{
			return TRUE;
		}
	}
	return FALSE;
}

/**
 * This returns TRUE if the right table of word w contains
 * a connector that can match to c.  shallows tells if c is shallow
 */
static Boolean
left_table_search(prune_context *pc, int w, Connector *c,
                  Boolean shallow, int word_c)
{
	unsigned int size, h;
	C_list *cl;
	power_table *pt;

	pt = pc->pt;
	size = pt->l_table_size[w];
	h = connector_hash(c) & (size-1);
	for (cl = pt->l_table[w][h]; cl != NULL; cl = cl->next)
	{
		if (possible_connection(pc, c, cl->c, shallow, cl->shallow, word_c, w))
			return TRUE;
	}
	return FALSE;
}

/**
 * Take this connector list, and try to match it with the words
 * w-1, w-2, w-3...  Returns the word to which the first connector of
 * the list could possibly be matched.  If c is NULL, returns w.  If
 * there is no way to match this list, it returns a negative number.
 * If it does find a way to match it, it updates the c->word fields
 * correctly.
 */
static int
left_connector_list_update(prune_context *pc, Connector *c,
                           int word_c, int w, Boolean shallow)
{
	int n;
	int foundmatch;

	if (c == NULL) return w;
	n = left_connector_list_update(pc, c->next, word_c, w, FALSE) - 1;
	if (((int) c->word) < n) n = c->word;

	/* n is now the rightmost word we need to check */
	foundmatch = FALSE;
	for (; n >= 0 ; n--) {
		pc->power_cost++;
		if (right_table_search(pc, n, c, shallow, word_c)) {
			foundmatch = TRUE;
			break;
		}
	}
	if (n < ((int) c->word)) {
		c->word = n;
		pc->N_changed++;
	}
	return (foundmatch ? n : -1);
}

/**
 * Take this connector list, and try to match it with the words
 * w+1, w+2, w+3...  Returns the word to which the first connector of
 * the list could possibly be matched.  If c is NULL, returns w.  If
 * there is no way to match this list, it returns a number greater than
 * N_words - 1.   If it does find a way to match it, it updates the
 * c->word fields correctly.
 */
static int
right_connector_list_update(prune_context *pc, Sentence sent, Connector *c,
                            int word_c, int w, Boolean shallow)
{
	int n;
	int foundmatch;

	if (c == NULL) return w;
	n = right_connector_list_update(pc, sent, c->next, word_c, w, FALSE) + 1;
	if (c->word > n) n = c->word;

	/* n is now the leftmost word we need to check */
	foundmatch = FALSE;
	for (; n < sent->length ; n++) {
		pc->power_cost++;
		if (left_table_search(pc, n, c, shallow, word_c)) {
			foundmatch = TRUE;
			break;
		}
	}
	if (n > c->word) {
		c->word = n;
		pc->N_changed++;
	}
	return (foundmatch ? n : sent->length);
}

/** The return value is the number of disjuncts deleted */
int power_prune(Sentence sent, int mode, Parse_Options opts)
{
	power_table *pt;
	prune_context *pc;
	Disjunct *d, *free_later, *dx, *nd;
	Connector *c;
	int w, N_deleted, total_deleted;

	pc = (prune_context *) xalloc (sizeof(prune_context));
	pc->power_cost = 0;
	pc->null_links = (opts->min_null_count > 0);
	pc->N_changed = 1;  /* forces it always to make at least two passes */

	pc->sent = sent;
#ifdef USE_FAT_LINKAGES
	pc->power_prune_mode = mode;
	pc->deletable = sent->deletable;
	pc->effective_dist = sent->effective_dist;
	count_set_effective_distance(sent);
#endif /* USE_FAT_LINKAGES */

	pt = power_table_new(sent);
	pc->pt = pt;

	free_later = NULL;
	N_deleted = 0;

	total_deleted = 0;

	while (1)
	{
		/* left-to-right pass */
		for (w = 0; w < sent->length; w++) {
			if ((2 == w%7) && parse_options_resources_exhausted(opts)) break;
			for (d = sent->word[w].d; d != NULL; d = d->next) {
				if (d->left == NULL) continue;
				if (left_connector_list_update(pc, d->left, w, w, TRUE) < 0) {
					for (c=d->left;  c != NULL; c = c->next) c->word = BAD_WORD;
					for (c=d->right; c != NULL; c = c->next) c->word = BAD_WORD;
					N_deleted++;
					total_deleted++;
				}
			}

			clean_table(pt->r_table_size[w], pt->r_table[w]);
			nd = NULL;
			for (d = sent->word[w].d; d != NULL; d = dx) {
				dx = d->next;
				if ((d->left != NULL) && (d->left->word == BAD_WORD)) {
					d->next = free_later;
					free_later = d;
				} else {
					d->next = nd;
					nd = d;
				}
			}
			sent->word[w].d = nd;
		}
		if (verbosity > 2) {
			printf("l->r pass changed %d and deleted %d\n",pc->N_changed,N_deleted);
		}

		if (pc->N_changed == 0) break;

		pc->N_changed = N_deleted = 0;
		/* right-to-left pass */

		for (w = sent->length-1; w >= 0; w--) {
			if ((2 == w%7) && parse_options_resources_exhausted(opts)) break;
			for (d = sent->word[w].d; d != NULL; d = d->next) {
				if (d->right == NULL) continue;
				if (right_connector_list_update(pc, sent, d->right, w, w, TRUE) >= sent->length) {
					for (c=d->right; c != NULL; c = c->next) c->word = BAD_WORD;
					for (c=d->left;  c != NULL; c = c->next) c->word = BAD_WORD;
					N_deleted++;
					total_deleted++;
				}
			}
			clean_table(pt->l_table_size[w], pt->l_table[w]);
			nd = NULL;
			for (d = sent->word[w].d; d != NULL; d = dx) {
				dx = d->next;
				if ((d->right != NULL) && (d->right->word == BAD_WORD)) {
					d->next = free_later;
					free_later = d;
				} else {
					d->next = nd;
					nd = d;
				}
			}
			sent->word[w].d = nd;
		}

		if (verbosity > 2) {
			printf("r->l pass changed %d and deleted %d\n", pc->N_changed,N_deleted);
		}

		if (pc->N_changed == 0) break;
		pc->N_changed = N_deleted = 0;
	}
	free_disjuncts(free_later);
	power_table_delete(pt);
	pt = NULL;
	pc->pt = NULL;
	
	if (verbosity > 2) printf("%d power prune cost:\n", pc->power_cost);

	if (mode == RUTHLESS) {
		print_time(opts, "power pruned (ruthless)");
	} else {
		print_time(opts, "power pruned (gentle)");
	}

	if (verbosity > 2) {
		if (mode == RUTHLESS) {
			printf("\nAfter power_pruning (ruthless):\n");
		} else {
			printf("\nAfter power_pruning (gentle):\n");
		}
		print_disjunct_counts(sent);
	}

	xfree(pc, sizeof(prune_context));
	return total_deleted;
}

/* ===================================================================
   PP Pruning

   The "contains one" post-processing constraints give us a new way to
   prune.  Suppose there's a rule that says "a group that contains foo
   must contain a bar or a baz."  Here foo, bar, and baz are connector
   types.  foo is the trigger link, bar and baz are called the criterion
   links.  If, after considering the disjuncts we find that there is is
   a foo, but neither a bar, nor a baz, then we can eliminte the disjuct
   containing bar.

   Things are actually a bit more complex, because of the matching rules
   and subscripts.  The problem is that post-processing deals with link
   names, while at this point all we have to work with is connector
   names.  Consider the foo part.  Consider a connector C.  When does
   foo match C for our purposes?  It matches it if every possible link
   name L (that can result from C being at one end of that link) results
   in post_process_match(foo,L) being true.  Suppose foo contains a "*".
   Then there is no C that has this property.  This is because the *s in
   C may be replaced by some other subscripts in the construction of L.
   And the non-* chars in L will not post_process_match the * in foo.

   So let's assume that foo has no *.  Now the result we want is simply
   given by post_process_match(foo, C).  Proof: L is the same as C but
   with some *s replaced by some other letters.  Since foo contains no *
   the replacement in C of some * by some other letter could change
   post_process_match from FALSE to TRUE, but not vice versa.  Therefore
   it's conservative to use this test.

   For the criterion parts, we need to determine if there is a
   collection of connectors C1, C2,... such that by combining them you
   can get a link name L that post_process_matches bar or baz.  Here's a
   way to do this.  Say bar="Xabc".  Then we see if there are connector
   names that post_process_match "Xa##", "X#b#", and "X##c".  They must
   all be there in order for it to be possible to create a link name
   "Xabc".  A "*" in the criterion part is a little different.  In this
   case we can simply skip the * (treat it like an upper case letter)
   for this purpose.  So if bar="X*ab" then we look for "X*#b" and
   "X*a#".  (The * in this case could be treated the same as another
   subscript without breaking it.)  Note also that it's only necessary
   to find a way to match one of the many criterion links that may be in
   the rule.  If none can match, then we can delete the disjunct
   containing C.

   Here's how we're going to implement this.  We'll maintain a multiset
   of connector names.  We'll represent them in a hash table, where the
   hash function uses only the upper case letters of the connector name.
   We'll insert all the connectors into the multiset.  The multiset will
   support the operation of deletion (probably simplest to just
   decrement the count).  Here's the algorithm.

   Insert all the connectors into M.

   While the previous pass caused a count to go to 0 do:
	  For each connector C do
		 For each rule R do
			if C is a trigger for R and the criterion links
			of the rule cannot be satisfied by the connectors in
			M, Then:
			   We delete C's disjunct.  But before we do,
			   we remove all the connectors of this disjunct
			   from the multiset.  Keep tabs on whether or not
			   any of the counts went to 0.


	Efficiency hacks to be added later:
		Note for a given rule can become less and less satisfiable.
		That is, rule_satisfiable(r) for a given rule r can change from
		TRUE to FALSE, but not vice versa.  So once it's FALSE, we can just
		remember that.

		Consider the effect of a pass p on the set of rules that are
		satisfiable.  Suppose this set does not change.  Then pass p+1
		will do nothing.  This is true even if pass p caused some
		disjuncts to be deleted.  (This observation will only obviate
		the need for the last pass.)

  */

static multiset_table * cms_table_new(void)
{
	multiset_table *mt;
	int i;

	mt = (multiset_table *) xalloc(sizeof(multiset_table));

	for (i=0; i<CMS_SIZE; i++) {
		mt->cms_table[i] = NULL;
	}
	return mt;
}

static void cms_table_delete(multiset_table *mt)
{
	Cms * cms, *xcms;
	int i;
	for (i=0; i<CMS_SIZE; i++)
	{
		for (cms = mt->cms_table[i]; cms != NULL; cms = xcms)
		{
			xcms = cms->next;
			xfree(cms, sizeof(Cms));
		}
	}
	xfree(mt, sizeof(multiset_table));
}

static unsigned int cms_hash(const char * s)
{
	unsigned int i = 5381;
	while (isupper((int) *s)) /* connector names are not yet UTF8-capable */
	{
		i = ((i << 5) + i) + *s;
		s++;
	}
	return (i & (CMS_SIZE-1));
}

/**
 * This returns TRUE if there is a connector name C in the table
 * such that post_process_match(pp_match_name, C) is TRUE
 */
static int match_in_cms_table(multiset_table *cmt, const char * pp_match_name)
{
	Cms * cms;
	for (cms = cmt->cms_table[cms_hash(pp_match_name)]; cms != NULL; cms = cms->next)
	{
		if(post_process_match(pp_match_name, cms->name)) return TRUE;
	}
	return FALSE;
}

static Cms * lookup_in_cms_table(multiset_table *cmt, const char * str)
{
	Cms * cms;
	for (cms = cmt->cms_table[cms_hash(str)]; cms != NULL; cms = cms->next)
	{
		if(strcmp(str, cms->name) == 0) return cms;
	}
	return NULL;
}

static void insert_in_cms_table(multiset_table *cmt, const char * str)
{
	Cms * cms;
	unsigned int h;
	cms = lookup_in_cms_table(cmt, str);
	if (cms != NULL) {
		cms->count++;
	} else {
		cms = (Cms *) xalloc(sizeof(Cms));
		cms->name = str;  /* don't copy the string...just keep a pointer to it.
							 we won't free these later */
		cms->count = 1;
		h = cms_hash(str);
		cms->next = cmt->cms_table[h];
		cmt->cms_table[h] = cms;
	}
}

/**
 * Delete the given string from the table.  Return TRUE if
 * this caused a count to go to 0, return FALSE otherwise.
 */
static int delete_from_cms_table(multiset_table *cmt, const char * str)
{
	Cms * cms;
	cms = lookup_in_cms_table(cmt, str);
	if (cms != NULL && cms->count > 0) {
		cms->count--;
		return (cms->count == 0);
	}
	return FALSE;
}

static Boolean rule_satisfiable(multiset_table *cmt, pp_linkset *ls)
{
	unsigned int hashval;
	const char * t;
	char name[20], *s;
	pp_linkset_node *p;
	int bad, n_subscripts;

	for (hashval = 0; hashval < ls->hash_table_size; hashval++)
	{
		for (p = ls->hash_table[hashval]; p!=NULL; p=p->next)
		{
			/* ok, we've got our hands on one of the criterion links */
			strncpy(name, p->str, sizeof(name)-1);
			/* could actually use the string in place because we change it back */
			name[sizeof(name)-1] = '\0';
			/* now we want to see if we can satisfy this criterion link */
			/* with a collection of the links in the cms table */

			for (s = name; isupper((int)*s); s++) {}
			for (;*s != '\0'; s++) if (*s != '*') *s = '#';
			for (s = name, t = p->str; isupper((int) *s); s++, t++) {}

			/* s and t remain in lockstep */
			bad = 0;
			n_subscripts = 0;
			for (;*s != '\0' && bad==0; s++, t++) {
				if (*s == '*') continue;
				n_subscripts++;
				/* after the upper case part, and is not a * so must be a regular subscript */
				*s = *t;
				if (!match_in_cms_table(cmt, name)) bad++;
				*s = '#';
			}

			if (n_subscripts == 0) {
				/* now we handle the special case which occurs if there
				   were 0 subscripts */
				if (!match_in_cms_table(cmt, name)) bad++;
			}

			/* now if bad==0 this criterion link does the job
			   to satisfy the needs of the trigger link */

			if (bad == 0) return TRUE;
		}
	}
	return FALSE;
}

static int pp_prune(Sentence sent, Parse_Options opts)
{
	pp_knowledge * knowledge;
	pp_rule rule;
	const char * selector;
	pp_linkset * link_set;
	int i, w, dir;
	Disjunct *d;
	Connector *c;
	int change, total_deleted, N_deleted, deleteme;
	multiset_table *cmt;

	if (sent->dict->postprocessor == NULL) return 0;

	knowledge = sent->dict->postprocessor->knowledge;

	cmt = cms_table_new();

	for (w = 0; w < sent->length; w++) {
		for (d = sent->word[w].d; d != NULL; d = d->next) {
			d->marked = TRUE;
			for (dir=0; dir < 2; dir++) {
				for (c = ( (dir)?(d->left):(d->right) ); c!=NULL; c=c->next) {
					insert_in_cms_table(cmt, c->string);
				}
			}
		}
	}

	total_deleted = 0;
	change = 1;
	while (change > 0) {
		change = 0;
		N_deleted = 0;
		for (w = 0; w < sent->length; w++) {
			for (d = sent->word[w].d; d != NULL; d = d->next) {
				if (!d->marked) continue;
				deleteme = FALSE;
				for (dir=0; dir < 2; dir++) {
					for (c = ( (dir)?(d->left):(d->right) ); c!=NULL; c=c->next) {
						for (i=0; i<knowledge->n_contains_one_rules; i++) {

							rule = knowledge->contains_one_rules[i]; /* the ith rule */
							selector = rule.selector;				/* selector string for this rule */
							link_set = rule.link_set;				/* the set of criterion links */

							if (strchr(selector, '*') != NULL) continue;  /* If it has a * forget it */

							if (!post_process_match(selector, c->string)) continue;

							/*
							printf("pp_prune: trigger ok.  selector = %s  c->string = %s\n", selector, c->string);
							*/

							/* We know c matches the trigger link of the rule. */
							/* Now check the criterion links */

							if (!rule_satisfiable(cmt, link_set)) {
								deleteme = TRUE;
							}
							if (deleteme) break;
						}
						if (deleteme) break;
					}
					if (deleteme) break;
				}

				if (deleteme) {					/* now we delete this disjunct */
					N_deleted++;
					total_deleted++;
					d->marked = FALSE; /* mark for deletion later */
					for (dir=0; dir < 2; dir++) {
						for (c = ( (dir)?(d->left):(d->right) ); c!=NULL; c=c->next) {
							change += delete_from_cms_table(cmt, c->string);
						}
					}
				}
			}
		}

		if (verbosity > 2) {
			printf("pp_prune pass deleted %d\n", N_deleted);
		}

	}
	delete_unmarked_disjuncts(sent);
	cms_table_delete(cmt);

	if (verbosity > 2) {
		printf("\nAfter pp_pruning:\n");
		print_disjunct_counts(sent);
	}

	print_time(opts, "pp pruning");

	return total_deleted;
}


/**
 * Do the following pruning steps until nothing happens:
 * power pp power pp power pp....
 * Make sure you do them both at least once.
 */
void pp_and_power_prune(Sentence sent, int mode, Parse_Options opts)
{
	power_prune(sent, mode, opts);

	for (;;) {
		if (parse_options_resources_exhausted(opts)) break;
		if (pp_prune(sent, opts) == 0) break;
		if (parse_options_resources_exhausted(opts)) break;
		if (power_prune(sent, mode, opts) == 0) break;
	}
}