/*-------------------------------------------------------------------*/ /* add hyphenation hints into a Finnish TeX file */ /* usage: hyphen [options] outfile */ /* options -f : convert ä to \"a etc. */ /* -d : print lots of debugging information */ /* The table nohyp defines regions that will not be hyphenated; */ /* add new items if necessary */ /* This is a very simple hyphenation program; the only rule is: */ /* c*cv -> c*|cv */ /* where c is a consonant, v is a vowel, and * means iteration (=>0) */ /* This finds all hyphenations except those between vowels; */ /* erroneous hyphenations may occur in compound words! */ /*-------------------------------------------------------------------*/ #include #include #define CR 13 #define LF 10 #define SP 32 /* ISO-Latin1 character codes ---------------------------------------*/ int letter [256]={ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0 -- 15 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 -- 31 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 32 -- 47 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 48 -- 63 */ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 64 -- 79 */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* 80 -- 95 */ 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 96 -- 111 */ 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* 112 -- 127 */ 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1, /* 128 -- 143 */ 1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0, /* 144 -- 159 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 160 -- 175 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 176 -- 191 */ 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 192 -- 207 */ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, /* 208 -- 223 */ 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 224 -- 239 */ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 /* 240 -- 255 */ } ; int vowel [256]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0 -- 15 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 -- 31 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 32 -- 47 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 48 -- 63 */ 0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1, /* 64 -- 79 */ 0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0, /* 80 -- 95 */ 0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,1, /* 96 -- 111 */ 0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0, /* 112 -- 127 */ 0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1, /* 128 -- 143 */ 1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0, /* 144 -- 159 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 160 -- 175 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 176 -- 191 */ 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 192 -- 207 */ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0, /* 208 -- 223 */ 0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0, /* 224 -- 239 */ 0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0 /* 240 -- 255 */ } ; int consonant[256]= { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0 -- 15 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 16 -- 31 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 32 -- 47 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 48 -- 63 */ 0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0, /* 64 -- 79 */ 1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0, /* 80 -- 95 */ 0,0,1,1,1,0,1,1,1,0,1,1,1,1,1,0, /* 96 -- 111 */ 1,1,1,1,1,0,1,1,1,0,1,0,0,0,0,0, /* 112 -- 127 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128 -- 143 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144 -- 159 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 160 -- 175 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 176 -- 191 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 192 -- 207 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 208 -- 223 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 224 -- 239 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 240 -- 255 */ } ; /* define text that should be left untouched ------------------------*/ struct omit { char *trigger ; /* skip text starting with *trigger */ int len ; /* compare at most len characters */ char *finish; /* termination string */ int lenf; } ; /* length of termination string */ #define MAXNOHYP 11 struct omit nohyp [MAXNOHYP] = { {"", 0, "", 0}, {"$$", 2, "$$", 2}, {"\\beginverbatim", 10, "\\endverbatim", 5}, {"\\input", 6, "\n", 1}, {"\\pict", 5, "}", 1}, {"\\spict", 6, "}", 1}, {"\\face", 5, "}", 1}, {"\\ps", 3, "\n", 1}, {"\\iii", 4, "}", 1}, {"\%nohyphen", 5, "\%hyp", 4}, {"\%", 1, "\n", 1} } ; unsigned char inbuf[200], /* input line */ inword[60], /* input word (only letters) */ outword[100], /* output word, hyphenation inserted */ buf[300] ; /* output line */ int cchar=0, /* character code of the current character */ incol=0, /* current input column */ len, /* input word = inword[0..len] */ llen, /* length of output word */ col=0, /* current output column */ finn=0, /* if set, convert finnish characters */ skip=0, /* while > 0 don't hyphenate */ debug=0, /* print a lot of extra output */ TeX='\\'; /* TeX control character */ int getword(int *token) ; void putword() ; void copyword() ; void hyphens() ; /*------------------------------------------------------------------*/ /* main program */ /*------------------------------------------------------------------*/ main(int argc, char **argv) { int i, m, n ; char *s ; /* analyze possible options -------------------------------------*/ for (i=1; i < argc; i++) { if (*(s=argv[i])=='-') s++; else break ; for (s = argv[i] ; *s != 0 ; s++) { if (*s == 'f') finn = 1 ; if (*s == 'd') debug = 1 ; } } /* read input text and add hyphenation to all words */ cchar = getchar() ; skip = 0; while ((n=getword(&skip)) > -1) { llen=len ; if (n == 0) /* ordinary word */ { hyphens(); putword() ; } else if (n == 1) /* control word */ { copyword() ; putword() ; } else if (n==2) /* skip something */ { copyword() ; putword() ; while (((m=getword(&skip)) > -1) && (m != 3)) { llen = len ; copyword() ; putword() ; } llen = len ; copyword() ; putword() ; } } } /*-------------------------------------------------------------------*/ /* read next word; a word can be */ /* - a string of letters => return 0 */ /* - a TeX word; \ followed by letters => return 1 */ /* - a special character => return 1 */ /* - beginning of a section not to be hyphenated => return 2 */ /* and set skip to show the type of the omitted region */ /* - end of a section not to be hyphenated => return 3, set skip=0 */ /* on end of file return -1 */ /*-------------------------------------------------------------------*/ int getword(int *skip) { int i, c; for (i=0; i<60; i++) inword[i]=0; len = 1; if (cchar < 0) return -1; /* end of file */ inword[0] = cchar; if (letter[cchar]) /* ordinary word, read all */ { /* characters of the word */ for (i=1; i<60; i++, len++) { cchar=getchar() ; if (cchar==EOF) break ; if (letter[cchar]) inword[i] = cchar ; else break ; } if (debug) printf(" 0 0 %d [%s]\n",len, inword) ; return 0 ; } else if (cchar==TeX) /* TeX control word */ { for (i=1; i<60; i++, len++) { cchar=getchar() ; if (cchar==EOF) break ; if (letter[cchar]) inword[i] = cchar ; else break ; } } else if (cchar=='\%') /* comment line, get the */ { /* next word, because it */ /* may contain a directive */ for (i=1; i<60; i++, len++) { cchar=getchar() ; if (cchar==EOF) break ; if (letter[cchar]) inword[i] = cchar ; else break ; } } else if (cchar=='$') /* math mode, check if the */ { /* next character is also $ */ cchar = getchar() ; if (cchar=='$') { inword[1] = cchar ; len++; cchar=getchar() ; } } else /* some other character */ { cchar = getchar() ; } /* check if we have to turn hyphenation off */ if (*skip==0) for (i=1; i< MAXNOHYP; i++) { if (strncmp(inword, nohyp[i].trigger, nohyp[i].len) == 0) { *skip = i; if (debug) printf(" beginskip %d 2 %d [%s]\n",i,len, inword) ; return 2 ; } } /* if hyphenation is off, check if it should be turned on */ else { if (strncmp(inword, nohyp[*skip].finish, nohyp[*skip].lenf) == 0) { if (debug) printf(" endskip %d 3 %d [%s]\n", *skip,len, inword); *skip = 0 ; return 3 ; } } if (debug) printf(" 0 1 %d [%s]\n",len, inword) ; return 1 ; } /*-------------------------------------------------------------------*/ /* copy inword to outword for printing; no hyphenation */ /*-------------------------------------------------------------------*/ void copyword() { int i; for (i=0; i<60; i++) outword[i]=0 ; for (i=0; (i 160))) { printf("%s\n", buf); if (debug) printf("[%s]\n", buf) ; col = 0 ; for (i=0; i<200; i++) buf[i] = 0; return ; } /* convert finnish characters to accented equivalents */ if (finn) { for (i=0, p=outword; (i<200)&&((*p)!=0); p++) { if ((*p) == 228) { buf[col++] = '\\'; buf[col++]='"'; buf[col++]='a'; i+=3 ;} else if ((*p) == 196) { buf[col++] = '\\'; buf[col++]='"'; buf[col++]='A'; i+=3 ;} else if ((*p) == 246) { buf[col++] = '\\'; buf[col++]='"'; buf[col++]='o'; i+=3 ;} else if ((*p) == 214) { buf[col++] = '\\'; buf[col++]='"'; buf[col++]='O'; i+=3 ;} else { buf[col++] = *p; i++; } } } else { for (i=0, p=outword; (i<200)&&((*p)!=0); p++) buf[col++] = *p ; } } /*-------------------------------------------------------------------*/ /* add hyphenation hints in inword and store the modified word in */ /* outword */ /*-------------------------------------------------------------------*/ void hyphens() { int i,n; unsigned char *in, *out; llen=0; for (i=0; i<100; i++) outword[i]=0; /* if the word is very short don't hyphenate */ if (len < 4) { for (i=0; i<= len; i++) outword[i] = inword[i]; llen=len+1; return; } /* count leading consonants */ for (n=0, in=inword; consonant[*in]; in++, n++) ; if (n < 2) n=2; in = inword; out = outword; /* never hyphenate after the first letter or between leading */ /* consonants */ for (i=0; i [%s]\n", inword, outword) ; }