C++程序  |  200行  |  5.11 KB

#include <string.h>
#include <stdlib.h>
#include <stdio.h>

#include "hyphen.h"
#include "csutil.h"

#define BUFSIZE 1000

void help() {
    fprintf(stderr,"correct syntax is:\n"); 
    fprintf(stderr,"example [-d | -dd] hyphen_dictionary_file file_of_words_to_check\n");
    fprintf(stderr,"-o = use old algorithm (without non-standard hyphenation)\n");
    fprintf(stderr,"-d = hyphenation with listing of the possible hyphenations\n");
}

/* get a pointer to the nth 8-bit or UTF-8 character of the word */
char * hindex(char * word, int n, int utf8) {
    int j = 0;
    while (j < n) {
        j++;
        word++;
        while (utf8 && ((((unsigned char) *word) >> 6) == 2)) word++;
    }
    return word;
}

/* list possible hyphenations with -dd option (example for the usage of the hyphenate2() function) */
void single_hyphenations(char * word, char * hyphen, char ** rep, int * pos, int * cut, int utf8) {
    int i, k, j = 0;
    char r;
    for (i = 0; (i + 1) < strlen(word); i++) {
        if (utf8 && ((((unsigned char) word[i]) >> 6) == 2)) continue;
        if ((hyphen[j] & 1)) {
            if (rep && rep[j]) {
              k = hindex(word, j - pos[j] + 1, utf8) - word;
              r = word[k];
              word[k] = 0;
              printf(" - %s%s", word, rep[j]);
              word[k] = r;
              printf("%s\n", hindex(word + k, cut[j], utf8));
            } else {
              k = hindex(word, j + 1, utf8) - word;
              r = word[k];
              word[k] = 0;
              printf(" - %s=", word);
              word[k] = r;
              printf("%s\n", word + k);
            }
        }
        j++;
    }
}

int 
main(int argc, char** argv)
{

    HyphenDict *dict;
    int df;
    int wtc;
    FILE* wtclst;
    int k, n, i, j, c;
    char buf[BUFSIZE + 1];
    int  nHyphCount;
    char *hyphens;
    char *lcword;
    char *hyphword;
    char hword[BUFSIZE * 2];
    int arg = 1;
    int optd = 1;
    int optdd = 0;
    char ** rep;
    int * pos;
    int * cut;

  /* first parse the command line options */
  /* arg1 - hyphen dictionary file, arg2 - file of words to check */

  if (argv[arg]) {
       if (strcmp(argv[arg], "-o") == 0) {
            optd = 0;
            arg++;
       }
       if (argv[arg] && strcmp(argv[arg], "-d") == 0) {
            optd = 1;
            optdd = 1;
            arg++;
       }
  }

  if (argv[arg]) {
       df = arg++;
  } else {
    help();
    exit(1);
  }

  if (argv[arg]) {
       wtc = arg++;
  } else {
    help();
    exit(1);
  }

  /* load the hyphenation dictionary */  
  if ((dict = hnj_hyphen_load(argv[df])) == NULL) {
       fprintf(stderr, "Couldn't find file %s\n", argv[df]);
       fflush(stderr);
       exit(1);
  }

  /* open the words to check list */
  wtclst = fopen(argv[wtc],"r");
  if (!wtclst) {
    fprintf(stderr,"Error - could not open file of words to check\n");
    exit(1);
  }

    
  /* now read each word from the wtc file */
    while(fgets(buf,BUFSIZE,wtclst)) {
       k = strlen(buf);
       if (buf[k - 1] == '\n') buf[k - 1] = '\0';
       if (*buf && buf[k - 2] == '\r') buf[k-- - 2] = '\0';

       /* set aside some buffers to hold lower cased */
       /* and hyphen information */
       lcword = (char *) malloc(k+1);
       hyphens = (char *)malloc(k+5);
       if (dict->utf8) {
         strcpy(lcword, buf);
       } else {
         enmkallsmall(lcword,buf,dict->cset);
       }

       /* first remove any trailing periods */
       n = k-1;
       while((n >=0) && (lcword[n] == '.')) n--;
       n++;

       /* now actually try to hyphenate the word */
       
       rep = NULL;
       pos = NULL;
       cut = NULL;
       hword[0] = '\0';

       if ((!optd && hnj_hyphen_hyphenate(dict, lcword, n-1, hyphens)) ||
	    (optd && hnj_hyphen_hyphenate2(dict, lcword, n-1, hyphens, hword, &rep, &pos, &cut))) {
             free(hyphens);
             free(lcword);
             fprintf(stderr, "hyphenation error\n");
             exit(1);
       }

       if (!optd) {
         /* now backfill hyphens[] for any removed periods */
         for (c = n; c < k; c++) hyphens[c] = '0';
         hyphens[k] = '\0';

         /* now create a new char string showing hyphenation positions */
         /* count the hyphens and allocate space for the new hypehanted string */
         nHyphCount = 0;
         for (i = 0; i < n; i++)
           if (hyphens[i]&1)
             nHyphCount++;
         hyphword = (char *) malloc(k+1+nHyphCount);
         j = 0;
         for (i = 0; i < n; i++) {
	   hyphword[j++] = buf[i];
           if (hyphens[i]&1) {
	      hyphword[j++] = '-';
	   }
         }
         hyphword[j] = '\0';
         fprintf(stdout,"%s\n",hyphword);
         fflush(stdout);
         free(hyphword);
      } else {
         fprintf(stdout,"%s\n", hword);
         if (optdd) single_hyphenations(lcword, hyphens, rep, pos, cut, dict->utf8);
         if (rep) {
            for (i = 0; i < n - 1; i++) {
                if (rep[i]) free(rep[i]);
            }
            free(rep);
            free(pos);
            free(cut);
         }
      }
      free(hyphens);
      free(lcword);
    }

    fclose(wtclst);
    hnj_hyphen_free(dict);
    return 0;
}