/* * $Id$ */ /* * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All modifications to the source code must be clearly marked as * such. Binary redistributions based on modified source code * must be clearly marked as modified versions in the documentation * and/or other materials provided with the distribution. * 4. All advertising materials mentioning features or use of this software * must display the following acknowledgment: * This product includes software developed by Geoff Kuenning and * other unpaid contributors. * 5. The name of Geoff Kuenning may not be used to endorse or promote * products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * $Log$ * Revision 1.10 1999/12/21 18:46:29 sterwill * ispell patch for non-English dictionaries by Henrik Berg * * Revision 1.9 1999/10/20 03:19:35 paul * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. * * Revision 1.8 1999/09/29 23:33:32 justin * Updates to the underlying ispell-based code to support suggested corrections. * * Revision 1.7 1999/04/13 17:12:51 jeff * Applied "Darren O. Benham" spell check changes. * Fixed crash on Win32 with the new code. * * Revision 1.6 1999/01/07 05:14:22 sterwill * So it builds on Unix... it might break win32 in ispell, since ut_types * is no longer included. This is a temporary solution to a larger problem * of including C++ headers in C source files. * * Revision 1.6 1999/01/07 05:14:22 sterwill * So it builds on Unix... it might break win32 in ispell, since ut_types * is no longer included. This is a temporary solution to a larger problem * of including C++ headers in C source files. * * Revision 1.5 1999/01/07 05:02:25 sterwill * Checking in half-broken to avoid tree lossage * * Revision 1.4 1999/01/07 01:07:48 paul * Fixed spell leaks. * * Revision 1.3 1998/12/29 15:03:54 eric * * minor fix to ispell.h to get things to compile on Linux again. * * Revision 1.2 1998/12/29 14:55:33 eric * * I've doctored the ispell code pretty extensively here. It is now * warning-free on Win32. It also *works* on Win32 now, since I * replaced all the I/O calls with ANSI standard ones. * * Revision 1.1 1998/12/28 18:04:43 davet * Spell checker code stripped from ispell. At this point, there are * two external routines... the Init routine, and a check-a-word routine * which returns a boolean value, and takes a 16 bit char string. * The code resembles the ispell code as much as possible still. * * Revision 1.68 1995/03/06 02:42:41 geoff * Be vastly more paranoid about parenthesizing macro arguments. This * fixes a bug in defmt.c where a complex argument was passed to * isstringch. * * Revision 1.67 1995/01/03 19:24:12 geoff * Get rid of a non-global declaration. * * Revision 1.66 1994/12/27 23:08:49 geoff * Fix a lot of subtly bad assumptions about the widths of ints and longs * which only show up on 64-bit machines like the Cray and the DEC Alpha. * * Revision 1.65 1994/11/02 06:56:10 geoff * Remove the anyword feature, which I've decided is a bad idea. * * Revision 1.64 1994/10/25 05:46:18 geoff * Add the FF_ANYWORD flag for defining an affix that will apply to any * word, even if not explicitly specified. (Good for French.) * * Revision 1.63 1994/09/16 04:48:28 geoff * Make stringdups and laststringch unsigned ints, and dupnos a plain * int, so that we can handle more than 128 stringchars and stringchar * types. * * Revision 1.62 1994/09/01 06:06:39 geoff * Change erasechar/killchar to uerasechar/ukillchar to avoid * shared-library problems on HP systems. * * Revision 1.61 1994/08/31 05:58:35 geoff * Add contextoffset, used in -a mode to handle extremely long lines. * * Revision 1.60 1994/05/17 06:44:15 geoff * Add support for controlled compound formation and the COMPOUNDONLY * option to affix flags. * * Revision 1.59 1994/03/15 06:25:16 geoff * Change deftflag's initialization so we can tell if -t/-n appeared. * * Revision 1.58 1994/02/07 05:53:28 geoff * Add typecasts to the the 7-bit versions of ichar* routines * * Revision 1.57 1994/01/25 07:11:48 geoff * Get rid of all old RCS log lines in preparation for the 3.1 release. * */ #include /* #include "ut_types.h" */ /* largest word accepted from a file by any input routine, plus one */ #ifndef INPUTWORDLEN #define INPUTWORDLEN 100 #endif /* largest amount that a word might be extended by adding affixes */ #ifndef MAXAFFIXLEN #define MAXAFFIXLEN 20 #endif /* ** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or ** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags ** available, respectively. If it is 32, the flags are named with the ** 26 English uppercase letters; lowercase will be converted to uppercase. ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`". ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters, ** respectively, are theoretically available, though a few (newline, slash, ** null byte) are pretty hard to actually use successfully. ** ** Note that a number of non-English affix files depend on having a ** larger value for MASKBITS. See the affix files for more ** information. #ifndef MASKBITS #define MASKBITS 64 #endif */ extern int gnMaskBits; /* ** C type to use for masks. This should be a type that the processor ** accesses efficiently. ** ** MASKTYPE_WIDTH must correctly reflect the number of bits in a ** MASKTYPE. Unfortunately, it is also required to be a constant at ** preprocessor time, which means you can't use the sizeof operator to ** define it. ** ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get ** division-by-zero errors! */ #ifndef MASKTYPE #define MASKTYPE long #endif #ifndef MASKTYPE_WIDTH #define MASKTYPE_WIDTH 32 #endif /* program: this should be coded now in init #if MASKBITS < MASKTYPE_WIDTH #undef MASKBITS #define MASKBITS MASKTYPE_WIDTH #endif / * MASKBITS < MASKTYPE_WIDTH */ /* ** Maximum hash table fullness percentage. Larger numbers trade space ** for time. **/ #ifndef MAXPCT #define MAXPCT 70 /* Expand table when 70% full */ #endif /* ** Maximum number of "string" characters that can be defined in a ** language (affix) file. Don't forget that an upper/lower string ** character counts as two! */ #ifndef MAXSTRINGCHARS #define MAXSTRINGCHARS 100 #endif /* MAXSTRINGCHARS */ /* ** Maximum length of a "string" character. The default is appropriate for ** nroff-style characters starting with a backslash. */ #ifndef MAXSTRINGCHARLEN #define MAXSTRINGCHARLEN 10 #endif /* MAXSTRINGCHARLEN */ /* ** Maximum number of "hits" expected on a word. This is basically the ** number of different ways different affixes can produce the same word. ** For example, with "english.aff", "brothers" can be produced 3 ways: ** "brothers," "brother+s", or "broth+ers". If this is too low, no major ** harm will be done, but ispell may occasionally forget a capitalization. */ #ifndef MAX_HITS #define MAX_HITS 10 #endif /* ** Maximum number of capitalization variations expected in any word. ** Besides the obvious all-lower, all-upper, and capitalized versions, ** this includes followcase variants. If this is too low, no real ** harm will be done, but ispell may occasionally fail to suggest a ** correct capitalization. */ #ifndef MAX_CAPS #define MAX_CAPS 10 #endif /* MAX_CAPS */ /* buffer size to use for file names if not in sys/param.h */ #ifndef MAXPATHLEN #define MAXPATHLEN 512 #endif /* ** Maximum language-table search size. Smaller numbers make ispell ** run faster, at the expense of more memory (the lowest reasonable value ** is 2). If a given character appears in a significant position in ** more than MAXSEARCH suffixes, it will be given its own index table. ** If you change this, define INDEXDUMP in lookup.c to be sure your ** index table looks reasonable. */ #ifndef MAXSEARCH #define MAXSEARCH 4 #endif #ifdef __STDC__ #define P(x) x #define VOID void #else /* __STDC__ */ #define P(x) () #define VOID char #define const #endif /* __STDC__ */ #ifdef NO8BIT #define SET_SIZE 128 #else #define SET_SIZE 256 #endif /*#define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH)*/ #ifdef lint extern int TSTMASKBIT P ((MASKTYPE * mask, int bit)); #else /* lint */ /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */ #define TSTMASKBIT(mask, bit) \ ((mask)[(bit) / MASKTYPE_WIDTH] & \ ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) #endif /* lint */ /* #if MASKBITS > 64 #define FULLMASKSET #endif */ #define FLAGBASE ((MASKTYPE_WIDTH) - 6) /* #if MASKBITS <= 32 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) #else # if MASKBITS <= 64 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) # else #define FLAGBASE 0 # endif #endif */ /* ** Data type for internal word storage. If necessary, we use shorts rather ** than chars so that string characters can be encoded as a single unit. */ #if (SET_SIZE + MAXSTRINGCHARS) <= 256 #ifndef lint #define ICHAR_IS_CHAR #endif /* lint */ #endif #ifdef ICHAR_IS_CHAR typedef unsigned char ichar_t; /* Internal character */ #define icharlen(s) strlen ((char *) (s)) #define icharcpy(a, b) strcpy ((char *) (a), (char *) (b)) #define icharcmp(a, b) strcmp ((char *) (a), (char *) (b)) #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n)) #define chartoichar(x) ((ichar_t) (x)) #else typedef unsigned short ichar_t; /* Internal character */ #define chartoichar(x) ((ichar_t) (unsigned char) (x)) /* * Structure used to record data about successful lookups; these values * are used in the ins_root_cap routine to produce correct capitalizations. */ struct success { struct dent * dictent; /* Header of dict entry chain for wd */ struct flagent * prefix; /* Prefix flag used, or NULL */ struct flagent * suffix; /* Suffix flag used, or NULL */ }; ichar_t* icharcpy (ichar_t* out, ichar_t* in); int icharlen (ichar_t* in); int icharcmp (ichar_t* s1, ichar_t* s2); int icharncmp (ichar_t* s1, ichar_t* s2, int n); int ichartostr (char* out, ichar_t* in, int outlen, int canonical); char * ichartosstr (ichar_t* in, int canonical); int strtoichar (ichar_t* out, char* in, int outlen, int canonical); struct dent * ispell_lookup (ichar_t* s, int dotree); int good (ichar_t* w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts); int linit(char*); void lcleanup(void); int cap_ok (ichar_t* word, struct success* hit, int len); void chk_aff (ichar_t* word, ichar_t* ucword, int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts); long whatcap (ichar_t* word); int hash (ichar_t* s, int hashtblsize); void makepossibilities(ichar_t* word); int findfiletype (char * name, int searchnames, int * deformatter); #endif struct dent { struct dent * next; char * word; MASKTYPE mask[2]; #ifdef FULLMASKSET char flags; #endif }; /* ** Flags in the directory entry. If FULLMASKSET is undefined, these are ** stored in the highest bits of the last longword of the mask field. If ** FULLMASKSET is defined, they are stored in the extra "flags" field. #ifndef NO_CAPITALIZATION_SUPPORT ** ** If a word has only one capitalization form, and that form is not ** FOLLOWCASE, it will have exactly one entry in the dictionary. The ** legal capitalizations will be indicated by the 2-bit capitalization ** field, as follows: ** ** ALLCAPS The word must appear in all capitals. ** CAPITALIZED The word must be capitalized (e.g., London). ** It will also be accepted in all capitals. ** ANYCASE The word may appear in lowercase, capitalized, ** or all-capitals. ** ** Regardless of the capitalization flags, the "word" field of the entry ** will point to an all-uppercase copy of the word. This is to simplify ** the large portion of the code that doesn't care about capitalization. ** Ispell will generate the correct version when needed. ** ** If a word has more than one capitalization, there will be multiple ** entries for it, linked together by the "next" field. The initial ** entry for such words will be a dummy entry, primarily for use by code ** that ignores capitalization. The "word" field of this entry will ** again point to an all-uppercase copy of the word. The "mask" field ** will contain the logical OR of the mask fields of all variants. ** A header entry is indicated by a capitalization type of ALLCAPS, ** with the MOREVARIANTS bit set. ** ** The following entries will define the individual variants. Each ** entry except the last has the MOREVARIANTS flag set, and each ** contains one of the following capitalization options: ** ** ALLCAPS The word must appear in all capitals. ** CAPITALIZED The word must be capitalized (e.g., London). ** It will also be accepted in all capitals. ** FOLLOWCASE The word must be capitalized exactly like the ** sample in the entry. Prefix (suffix) characters ** must be rendered in the case of the first (last) ** "alphabetic" character. It will also be accepted ** in all capitals. ("Alphabetic" means "mentioned ** in a 'casechars' statement".) ** ANYCASE The word may appear in lowercase, capitalized, ** or all-capitals. ** ** The "mask" field for the entry contains only the affix flag bits that ** are legal for that capitalization. The "word" field will be null ** except for FOLLOWCASE entries, where it will point to the ** correctly-capitalized spelling of the root word. ** ** It is worth discussing why the ALLCAPS option is used in ** the header entry. The header entry accepts an all-capitals ** version of the root plus every affix (this is always legal, since ** words get capitalized in headers and so forth). Further, all of ** the following variant entries will reject any all-capitals form ** that is illegal due to an affix. ** ** Finally, note that variations in the KEEP flag can cause a multiple-variant ** entry as well. For example, if the personal dictionary contains "ALPHA", ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a ** multiple-variant entry will be created so that "alpha" will be accepted ** but only "ALPHA" will actually be kept. #endif */ #ifdef FULLMASKSET #define flagfield flags #else #define flagfield mask[1] #endif #define USED ((MASKTYPE) 1 << (FLAGBASE + 0)) #define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1)) #ifdef NO_CAPITALIZATION_SUPPORT #define ALLFLAGS (USED | KEEP) #else /* NO_CAPITALIZATION_SUPPORT */ #define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2)) #define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2)) #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2)) #define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2)) #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2)) #define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4)) #define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS) #define captype(x) ((x) & CAPTYPEMASK) #endif /* NO_CAPITALIZATION_SUPPORT */ /* * Language tables used to encode prefix and suffix information. */ struct flagent { ichar_t * strip; /* String to strip off */ ichar_t * affix; /* Affix to append */ short flagbit; /* Flag bit this ent matches */ short stripl; /* Length of strip */ short affl; /* Length of affix */ short numconds; /* Number of char conditions */ short flagflags; /* Modifiers on this flag */ char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */ }; /* * Bits in flagflags */ #define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */ #define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */ union ptr_union /* Aid for building flg ptrs */ { struct flagptr * fp; /* Pointer to more indexing */ struct flagent * ent; /* First of a list of ents */ }; struct flagptr { union ptr_union pu; /* Ent list or more indexes */ int numents; /* If zero, pu.fp is valid */ }; /* * Description of a single string character type. */ struct strchartype { char * name; /* Name of the type */ char * deformatter; /* Deformatter to use */ char * suffixes; /* File suffixes, null seps */ }; /* * Header placed at the beginning of the hash file. */ struct hashheader { unsigned short magic; /* Magic number for ID */ unsigned short compileoptions; /* How we were compiled */ short maxstringchars; /* Max # strchrs we support */ short maxstringcharlen; /* Max strchr len supported */ short compoundmin; /* Min lth of compound parts */ short compoundbit; /* Flag 4 compounding roots */ int stringsize; /* Size of string table */ int lstringsize; /* Size of lang. str tbl */ int tblsize; /* No. entries in hash tbl */ int stblsize; /* No. entries in sfx tbl */ int ptblsize; /* No. entries in pfx tbl */ int sortval; /* Largest sort ID assigned */ int nstrchars; /* No. strchars defined */ int nstrchartype; /* No. strchar types */ int strtypestart; /* Start of strtype table */ char nrchars[5]; /* Nroff special characters */ char texchars[13]; /* TeX special characters */ char compoundflag; /* Compund-word handling */ char defhardflag; /* Default tryveryhard flag */ char flagmarker; /* "Start-of-flags" char */ unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */ ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */ ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */ char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */ char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */ char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */ char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */ char stringstarts[SET_SIZE]; /* NZ if char can start str */ char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */ unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */ int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */ unsigned short magic2; /* Second magic for dbl chk */ }; /* hash table magic number */ #define MAGIC 0x9602 /* compile options, put in the hash header for consistency checking */ #ifdef NO8BIT # define MAGIC8BIT 0x01 #else # define MAGIC8BIT 0x00 #endif #ifdef NO_CAPITALIZATION_SUPPORT # define MAGICCAPITALIZATION 0x00 #else # define MAGICCAPITALIZATION 0x02 #endif # define MAGICMASKSET 0x04 /* #if MASKBITS <= 32 # define MAGICMASKSET 0x00 #else # if MASKBITS <= 64 # else # if MASKBITS <= 128 # define MAGICMASKSET 0x08 # else # define MAGICMASKSET 0x0C # endif # endif #endif */ #define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET) /* ** Offsets into the nroff special-character array */ #define NRLEFTPAREN hashheader.nrchars[0] #define NRRIGHTPAREN hashheader.nrchars[1] #define NRDOT hashheader.nrchars[2] #define NRBACKSLASH hashheader.nrchars[3] #define NRSTAR hashheader.nrchars[4] /* ** Offsets into the TeX special-character array */ #define TEXLEFTPAREN hashheader.texchars[0] #define TEXRIGHTPAREN hashheader.texchars[1] #define TEXLEFTSQUARE hashheader.texchars[2] #define TEXRIGHTSQUARE hashheader.texchars[3] #define TEXLEFTCURLY hashheader.texchars[4] #define TEXRIGHTCURLY hashheader.texchars[5] #define TEXLEFTANGLE hashheader.texchars[6] #define TEXRIGHTANGLE hashheader.texchars[7] #define TEXBACKSLASH hashheader.texchars[8] #define TEXDOLLAR hashheader.texchars[9] #define TEXSTAR hashheader.texchars[10] #define TEXDOT hashheader.texchars[11] #define TEXPERCENT hashheader.texchars[12] /* ** Values for compoundflag */ #define COMPOUND_NEVER 0 /* Compound words are never good */ #define COMPOUND_ANYTIME 1 /* Accept run-together words */ #define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */ /* ** The isXXXX macros normally only check ASCII range, and don't support ** the character sets of other languages. These private versions handle ** whatever character sets have been defined in the affix files. */ #ifdef lint extern int myupper P ((unsigned int ch)); extern int mylower P ((unsigned int ch)); extern int myspace P ((unsigned int ch)); extern int iswordch P ((unsigned int ch)); extern int isboundarych P ((unsigned int ch)); extern int isstringstart P ((unsigned int ch)); extern ichar_t mytolower P ((unsigned int ch)); extern ichar_t mytoupper P ((unsigned int ch)); #else /* lint */ #if 0 #define myupper(X) (hashheader.upperchars[(X)]) #define mylower(X) (hashheader.lowerchars[(X)]) #define myspace(X) (((X) > 0) && ((X) < 0x80) \ && isspace((unsigned char) (X))) #define iswordch(X) (hashheader.wordchars[(X)]) #define isboundarych(X) (hashheader.boundarychars[(X)]) #define isstringstart(X) (hashheader.stringstarts[(unsigned char) (X)]) #define mytolower(X) (hashheader.lowerconv[(X)]) #define mytoupper(X) (hashheader.upperconv[(X)]) #else /* HACK: macros replaced with function implementations so we could do a side-effect-free check for unicode characters which aren't in hashheader */ char myupper(ichar_t c); char mylower(ichar_t c); int myspace(ichar_t c); char iswordch(ichar_t c); char isboundarych(ichar_t c); char isstringstart(ichar_t c); ichar_t mytolower(ichar_t c); ichar_t mytoupper(ichar_t c); #endif #endif /* lint */ /* ** These macros are similar to the ones above, but they take into account ** the possibility of string characters. Note well that they take a POINTER, ** not a character. ** ** The "l_" versions set "len" to the length of the string character as a ** handy side effect. (Note that the global "laststringch" is also set, ** and sometimes used, by these macros.) ** ** The "l1_" versions go one step further and guarantee that the "len" ** field is valid for *all* characters, being set to 1 even if the macro ** returns false. This macro is a great example of how NOT to write ** readable C. */ #define isstringch(ptr, canon) (isstringstart (*(ptr)) \ && stringcharlen ((ptr), (canon)) > 0) #define l_isstringch(ptr, len, canon) \ (isstringstart (*(ptr)) \ && (len = stringcharlen ((ptr), (canon))) \ > 0) #define l1_isstringch(ptr, len, canon) \ (len = 1, \ isstringstart ((unsigned char)(*(ptr))) \ && ((len = \ stringcharlen ((ptr), (canon))) \ > 0 \ ? 1 : (len = 1, 0))) /* * Sizes of buffers returned by ichartosstr/strtosichar. */ #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \ * sizeof (ichar_t)) /* * termcap variables */ #ifdef MAIN # define EXTERN /* nothing */ #else # define EXTERN extern #endif EXTERN char * BC; /* backspace if not ^H */ EXTERN char * cd; /* clear to end of display */ EXTERN char * cl; /* clear display */ EXTERN char * cm; /* cursor movement */ EXTERN char * ho; /* home */ EXTERN char * nd; /* non-destructive space */ EXTERN char * so; /* standout */ EXTERN char * se; /* standout end */ EXTERN int sg; /* space taken by so/se */ EXTERN char * ti; /* terminal initialization sequence */ EXTERN char * te; /* terminal termination sequence */ EXTERN int li; /* lines */ EXTERN int co; /* columns */ #if 0 EXTERN int contextsize; /* number of lines of context to show */ EXTERN char contextbufs[MAXCONTEXT][BUFSIZ]; /* Context of current line */ EXTERN int contextoffset; /* Offset of line start in contextbufs[0] */ EXTERN char * currentchar; /* Location in contextbufs */ #endif EXTERN char ctoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Current token as char */ EXTERN ichar_t itoken[INPUTWORDLEN + MAXAFFIXLEN]; /* Ctoken as ichar_t str */ #if 0 EXTERN char termcap[2048]; /* termcap entry */ EXTERN char termstr[2048]; /* for string values */ EXTERN char * termptr; /* pointer into termcap, used by tgetstr */ #endif EXTERN int numhits; /* number of hits in dictionary lookups */ EXTERN struct success hits[MAX_HITS]; /* table of hits gotten in lookup */ EXTERN char * hashstrings; /* Strings in hash table */ EXTERN struct hashheader hashheader; /* Header of hash table */ EXTERN struct dent * hashtbl; /* Main hash table, for dictionary */ EXTERN int hashsize; /* Size of main hash table */ EXTERN char hashname[MAXPATHLEN]; /* Name of hash table file */ EXTERN int aflag; /* NZ if -a or -A option specified */ EXTERN int cflag; /* NZ if -c (crunch) option */ EXTERN int lflag; /* NZ if -l (list) option */ EXTERN int incfileflag; /* whether xgets() acts exactly like gets() */ EXTERN int nodictflag; /* NZ if dictionary not needed */ EXTERN int uerasechar; /* User's erase character, from stty */ EXTERN int ukillchar; /* User's kill character */ EXTERN unsigned int laststringch; /* Number of last string character */ EXTERN int defdupchar; /* Default duplicate string type */ EXTERN int numpflags; /* Number of prefix flags in table */ EXTERN int numsflags; /* Number of suffix flags in table */ EXTERN struct flagptr pflagindex[SET_SIZE + MAXSTRINGCHARS]; /* Fast index to pflaglist */ EXTERN struct flagent * pflaglist; /* Prefix flag control list */ EXTERN struct flagptr sflagindex[SET_SIZE + MAXSTRINGCHARS]; /* Fast index to sflaglist */ EXTERN struct flagent * sflaglist; /* Suffix flag control list */ EXTERN struct strchartype * /* String character type collection */ chartypes; EXTERN FILE * infile; /* File being corrected */ EXTERN FILE * outfile; /* Corrected copy of infile */ EXTERN char * askfilename; /* File specified in -f option */ EXTERN int changes; /* NZ if changes made to cur. file */ EXTERN int readonly; /* NZ if current file is readonly */ EXTERN int quit; /* NZ if we're done with this file */ #define MAXPOSSIBLE 100 /* Max no. of possibilities to generate */ EXTERN char possibilities[MAXPOSSIBLE][INPUTWORDLEN + MAXAFFIXLEN]; /* Table of possible corrections */ EXTERN int pcount; /* Count of possibilities generated */ EXTERN int maxposslen; /* Length of longest possibility */ EXTERN int easypossibilities; /* Number of "easy" corrections found */ /* ..(defined as those using legal affixes) */ /* * The following array contains a list of characters that should be tried * in "missingletter." Note that lowercase characters are omitted. */ EXTERN int Trynum; /* Size of "Try" array */ EXTERN ichar_t Try[SET_SIZE + MAXSTRINGCHARS]; /* * Initialized variables. These are generated using macros so that they * may be consistently declared in all programs. Numerous examples of * usage are given below. */ #ifdef MAIN #define INIT(decl, init) decl = init #else #define INIT(decl, init) extern decl #endif #ifdef MINIMENU INIT (int minimenusize, 2); /* MUST be either 2 or zero */ #else /* MINIMENU */ INIT (int minimenusize, 0); /* MUST be either 2 or zero */ #endif /* MINIMENU */ INIT (int eflag, 0); /* NZ for expand mode */ INIT (int dumpflag, 0); /* NZ to do dump mode */ INIT (int fflag, 0); /* NZ if -f specified */ #ifndef USG INIT (int sflag, 0); /* NZ to stop self after EOF */ #endif INIT (int vflag, 0); /* NZ to display characters as M-xxx */ INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */ INIT (int deftflag, -1); /* NZ for TeX mode by default */ INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ INIT (int prefstringchar, -1); /* Preferred string character type */ INIT (int terse, 0); /* NZ for "terse" mode */ INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */ INIT (int minword, MINWORD); /* Longest always-legal word */ INIT (int sortit, 1); /* Sort suggestions alphabetically */ INIT (int compoundflag, -1); /* How to treat compounds: see above */ INIT (int tryhardflag, -1); /* Always call tryveryhard */ INIT (char * currentfile, NULL); /* Name of current input file */ /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */ INIT (int math_mode, 0); /* P -- paragraph or LR mode * b -- parsing a \begin statement * e -- parsing an \end statement * r -- parsing a \ref type of argument. * m -- looking for a \begin{minipage} argument. */ INIT (char LaTeX_Mode, 'P');