sombok  2.2.1
include/sombok.h
Go to the documentation of this file.
00001 /*
00002  * sombok.h - common definitions for Sombok library
00003  * 
00004  * Copyright (C) 2009-2012 by Hatuka*nezumi - IKEDA Soji.
00005  *
00006  * This file is part of the Sombok Package.  This program is free
00007  * software; you can redistribute it and/or modify it under the terms of
00008  * either the GNU General Public License or the Artistic License, as
00009  * specified in the README file.
00010  *
00011  */
00012 
00013 #ifndef _SOMBOK_H_
00014 
00015 #ifdef HAVE_CONFIG_H
00016 #    include "config.h"
00017 #endif
00018 #include <errno.h>
00019 #include <stddef.h>
00020 #include <stdlib.h>
00021 #include <string.h>
00022 #ifdef HAVE_STRINGS_H
00023 #    include <strings.h>
00024 #endif                          /* HAVE_STRINGS_H */
00025 
00026 #define SOMBOK_VERSION "2.2.1"
00027 
00028 #define SOMBOK_UNICHAR_T_IS_WCHAR_T
00029 #define SOMBOK_UNICHAR_T_IS_UNSIGNED_INT
00030 #define SOMBOK_UNICHAR_T_IS_UNSIGNED_LONG
00031 
00032 /***
00033  *** Data structure.
00034  ***/
00035 
00036 /* Primitive types */
00037 
00039 typedef unsigned int unichar_t;
00040 
00043 typedef unsigned char propval_t;
00044 
00047 typedef struct {
00051     unichar_t *str;
00053     size_t len;
00054 } unistr_t;
00055 
00059 typedef struct {
00061     size_t idx;
00063     size_t len;
00065     size_t col;
00067     propval_t lbc;
00069     propval_t elbc;
00071     unsigned char flag;
00072 } gcchar_t;
00073 
00076 typedef struct {
00078     unichar_t beg;
00080     unichar_t end;
00082     propval_t lbc;
00084     propval_t eaw;
00086     propval_t gcb;
00088     propval_t scr;
00089 } mapent_t;
00090 
00091 struct linebreak_t;
00092 
00095 typedef struct {
00099     unichar_t *str;
00101     size_t len;
00104     gcchar_t *gcstr;
00106     size_t gclen;
00108     size_t pos;
00110     struct linebreak_t *lbobj;
00111 } gcstring_t;
00112 
00115 typedef enum {
00116     LINEBREAK_STATE_NONE = 0,
00117     LINEBREAK_STATE_SOT, LINEBREAK_STATE_SOP, LINEBREAK_STATE_SOL,
00118     LINEBREAK_STATE_LINE,
00119     LINEBREAK_STATE_EOL, LINEBREAK_STATE_EOP, LINEBREAK_STATE_EOT,
00120     LINEBREAK_STATE_MAX
00121 } linebreak_state_t;
00122 
00123 typedef void
00124     (*linebreak_ref_func_t) (void *, int, int);
00125 typedef gcstring_t *
00126     (*linebreak_format_func_t) (struct linebreak_t *, linebreak_state_t,
00127                                 gcstring_t *);
00128 typedef double
00129     (*linebreak_sizing_func_t) (struct linebreak_t *, double,
00130                                 gcstring_t *, gcstring_t *, gcstring_t *);
00131 typedef gcstring_t *
00132     (*linebreak_urgent_func_t) (struct linebreak_t *, gcstring_t *);
00133 typedef gcstring_t *
00134     (*linebreak_prep_func_t) (struct linebreak_t *, void *, unistr_t *,
00135                               unistr_t *);
00136 typedef gcstring_t *
00137     (*linebreak_obs_prep_func_t) (struct linebreak_t *, unistr_t *);
00138 
00141 typedef struct linebreak_t {
00145     unsigned long int refcount;
00147     int state;
00149     unistr_t bufstr;
00151     unistr_t bufspc;
00153     double bufcols;
00155     unistr_t unread;
00161     size_t charmax;
00163     double colmax;
00165     double colmin;
00167     mapent_t *map;
00168     size_t mapsiz;
00170     unistr_t newline;
00172     unsigned int options;
00174     void *format_data;
00175     void *sizing_data;
00176     void *urgent_data;
00178     void *user_data;
00180     void *stash;
00182     linebreak_format_func_t format_func;
00184     linebreak_sizing_func_t sizing_func;
00186     linebreak_urgent_func_t urgent_func;
00189     linebreak_obs_prep_func_t user_func;
00197     linebreak_ref_func_t ref_func;
00201     int errnum;
00207     linebreak_prep_func_t * prep_func;
00209     void **prep_data;
00211 } linebreak_t;
00212 
00213 /***
00214  *** Constants.
00215  ***/
00216 
00218 #define PROP_UNKNOWN ((propval_t)~0)
00219 
00222 #define LINEBREAK_FLAG_PROHIBIT_BEFORE (1)
00223 #define LINEBREAK_FLAG_ALLOW_BEFORE (2)
00224 #define LINEBREAK_FLAG_BREAK_BEFORE LINEBREAK_FLAG_ALLOW_BEFORE
00225 
00228 #define LINEBREAK_DEFAULT_CHARMAX (998)
00229 
00232 #define LINEBREAK_OPTION_EASTASIAN_CONTEXT (1)
00233 #define LINEBREAK_OPTION_HANGUL_AS_AL (2)
00234 #define LINEBREAK_OPTION_LEGACY_CM (4)
00235 #define LINEBREAK_OPTION_BREAK_INDENT (8)
00236 #define LINEBREAK_OPTION_COMPLEX_BREAKING (16)
00237 #define LINEBREAK_OPTION_NONSTARTER_LOOSE (32)
00238 #define LINEBREAK_OPTION_VIRAMA_AS_JOINER (64)
00239 
00242 #define LINEBREAK_STATE_SOT_FORMAT (-LINEBREAK_STATE_SOT)
00243 #define LINEBREAK_STATE_SOP_FORMAT (-LINEBREAK_STATE_SOP)
00244 #define LINEBREAK_STATE_SOL_FORMAT (-LINEBREAK_STATE_SOL)
00245 
00248 #define LINEBREAK_REF_STASH (0)
00249 #define LINEBREAK_REF_FORMAT (1)
00250 #define LINEBREAK_REF_SIZING (2)
00251 #define LINEBREAK_REF_URGENT (3)
00252 #define LINEBREAK_REF_USER (4)
00253 #define LINEBREAK_REF_PREP (5)
00254 
00257 #define LINEBREAK_ACTION_MANDATORY (4)
00258 #define LINEBREAK_ACTION_DIRECT (3)
00259 #define LINEBREAK_ACTION_INDIRECT (2)
00260 #define LINEBREAK_ACTION_PROHIBITED (1)
00261 
00264 #define LINEBREAK_ELONG (-2)
00265 #define LINEBREAK_EEXTN (-3)
00266 
00269 #define SOMBOK_UTF8_CHECK_NONE (0)
00270 #define SOMBOK_UTF8_CHECK_MALFORMED (1)
00271 #define SOMBOK_UTF8_CHECK_SURROGATE (2)
00272 #define SOMBOK_UTF8_CHECK_NONUNICODE (3)
00273 
00274 /***
00275  *** Public functions, global variables and macros.
00276  ***/
00277 
00278 extern void linebreak_charprop(linebreak_t *, unichar_t,
00279                                propval_t *, propval_t *, propval_t *,
00280                                propval_t *);
00281 
00282 extern gcstring_t *gcstring_new(unistr_t *, linebreak_t *);
00283 extern gcstring_t *gcstring_new_from_utf8(char *, size_t, int,
00284                                           linebreak_t *);
00285 extern gcstring_t *gcstring_newcopy(unistr_t *, linebreak_t *);
00286 extern gcstring_t *gcstring_copy(gcstring_t *);
00287 extern void gcstring_destroy(gcstring_t *);
00288 extern gcstring_t *gcstring_append(gcstring_t *, gcstring_t *);
00289 extern size_t gcstring_columns(gcstring_t *);
00290 extern int gcstring_cmp(gcstring_t *, gcstring_t *);
00291 extern gcstring_t *gcstring_concat(gcstring_t *, gcstring_t *);
00292 extern gcchar_t *gcstring_next(gcstring_t *);
00293 extern void gcstring_setpos(gcstring_t *, int);
00294 extern void gcstring_shrink(gcstring_t *, int);
00295 extern gcstring_t *gcstring_substr(gcstring_t *, int, int);
00296 extern gcstring_t *gcstring_replace(gcstring_t *, int, int, gcstring_t *);
00297 
00298 #define gcstring_eos(gcstr) \
00299   ((gcstr)->gclen <= (gcstr)->pos)
00300 #define gcstring_getpos(gcstr) \
00301   ((gcstr)->pos)
00302 
00303 extern propval_t gcstring_lbclass(gcstring_t *, int);
00304 extern propval_t gcstring_lbclass_ext(gcstring_t *, int);
00305 
00306 extern linebreak_t *linebreak_new(linebreak_ref_func_t);
00307 extern linebreak_t *linebreak_copy(linebreak_t *);
00308 extern linebreak_t *linebreak_incref(linebreak_t *);
00309 extern void linebreak_destroy(linebreak_t *);
00310 
00311 extern void linebreak_set_newline(linebreak_t *, unistr_t *);
00312 extern void linebreak_set_stash(linebreak_t *, void *);
00313 extern void linebreak_set_format(linebreak_t *, linebreak_format_func_t,
00314                                  void *);
00315 extern void linebreak_add_prep(linebreak_t *, linebreak_prep_func_t,
00316                                void *);
00317 extern void linebreak_set_sizing(linebreak_t *, linebreak_sizing_func_t,
00318                                  void *);
00319 extern void linebreak_set_urgent(linebreak_t *, linebreak_urgent_func_t,
00320                                  void *);
00321 extern void linebreak_set_user(linebreak_t *, linebreak_obs_prep_func_t,
00322                                void *);
00323 extern void linebreak_reset(linebreak_t *);
00324 extern void linebreak_update_lbclass(linebreak_t *, unichar_t, propval_t);
00325 extern void linebreak_clear_lbclass(linebreak_t *);
00326 extern void linebreak_update_eawidth(linebreak_t *, unichar_t, propval_t);
00327 extern void linebreak_clear_eawidth(linebreak_t *);
00328 extern propval_t linebreak_search_lbclass(linebreak_t *, unichar_t);
00329 extern propval_t linebreak_search_eawidth(linebreak_t *, unichar_t);
00330 extern void linebreak_merge_lbclass(linebreak_t *, linebreak_t *);
00331 extern void linebreak_merge_eawidth(linebreak_t *, linebreak_t *);
00332 
00333 extern propval_t linebreak_eawidth(linebreak_t *, unichar_t); /* obs. */
00334 extern propval_t linebreak_get_lbrule(linebreak_t *, propval_t, propval_t);
00335 extern propval_t linebreak_lbclass(linebreak_t *, unichar_t); /* obs. */
00336 
00337 extern gcstring_t **linebreak_break(linebreak_t *, unistr_t *);
00338 extern gcstring_t **linebreak_break_fast(linebreak_t *, unistr_t *);
00339 extern gcstring_t **linebreak_break_from_utf8(linebreak_t *, char *,
00340                                               size_t, int);
00341 extern gcstring_t **linebreak_break_partial(linebreak_t *, unistr_t *);
00342 extern void linebreak_free_result(gcstring_t **, int);
00343 extern propval_t linebreak_lbrule(propval_t, propval_t); /* obs. */
00344 
00345 extern const char *linebreak_unicode_version;
00346 extern const char *linebreak_propvals_EA[];
00347 extern const char *linebreak_propvals_LB[];
00348 extern const char *linebreak_southeastasian_supported;
00349 extern void linebreak_southeastasian_flagbreak(gcstring_t *);
00350 
00351 extern unistr_t *sombok_decode_utf8(unistr_t *, size_t, const char *,
00352                                     size_t, int);
00353 extern char *sombok_encode_utf8(char *, size_t *, size_t, unistr_t *);
00354 
00355 /***
00356  *** Built-in callbacks for linebreak_t.
00357  ***/
00358 extern gcstring_t *linebreak_format_SIMPLE(linebreak_t *,
00359                                            linebreak_state_t,
00360                                            gcstring_t *);
00361 extern gcstring_t *linebreak_format_NEWLINE(linebreak_t *,
00362                                             linebreak_state_t,
00363                                             gcstring_t *);
00364 extern gcstring_t *linebreak_format_TRIM(linebreak_t *, linebreak_state_t,
00365                                          gcstring_t *);
00366 extern gcstring_t *linebreak_prep_URIBREAK(linebreak_t *, void *,
00367                                            unistr_t *, unistr_t *);
00368 extern double linebreak_sizing_UAX11(linebreak_t *, double, gcstring_t *,
00369                                      gcstring_t *, gcstring_t *);
00370 extern gcstring_t *linebreak_urgent_ABORT(linebreak_t *, gcstring_t *);
00371 extern gcstring_t *linebreak_urgent_FORCE(linebreak_t *, gcstring_t *);
00372 
00373 #define _SOMBOK_H_
00374 #endif                          /* _SOMBOK_H_ */
00375 
00376 #ifdef MALLOC_DEBUG
00377 #include "src/mymalloc.h"
00378 #endif                          /* MALLOC_DEBUG */