#include "../fitz/fitz-imp.h" #include "mupdf/ucdn.h" #include "html-imp.h" #include #include #include enum { T, R, B, L }; #define DEFAULT_DIR FZ_BIDI_LTR static const char *html_default_css = "@page{margin:3em 2em}" "a{color:#06C;text-decoration:underline}" "address{display:block;font-style:italic}" "b{font-weight:bold}" "bdo{direction:rtl;unicode-bidi:bidi-override}" "blockquote{display:block;margin:1em 40px}" "body{display:block;margin:1em}" "cite{font-style:italic}" "code{font-family:monospace}" "dd{display:block;margin:0 0 0 40px}" "del{text-decoration:line-through}" "div{display:block}" "dl{display:block;margin:1em 0}" "dt{display:block}" "em{font-style:italic}" "h1{display:block;font-size:2em;font-weight:bold;margin:0.67em 0;page-break-after:avoid}" "h2{display:block;font-size:1.5em;font-weight:bold;margin:0.83em 0;page-break-after:avoid}" "h3{display:block;font-size:1.17em;font-weight:bold;margin:1em 0;page-break-after:avoid}" "h4{display:block;font-size:1em;font-weight:bold;margin:1.33em 0;page-break-after:avoid}" "h5{display:block;font-size:0.83em;font-weight:bold;margin:1.67em 0;page-break-after:avoid}" "h6{display:block;font-size:0.67em;font-weight:bold;margin:2.33em 0;page-break-after:avoid}" "head{display:none}" "hr{border-style:solid;border-width:1px;display:block;margin-bottom:0.5em;margin-top:0.5em;text-align:center}" "html{display:block}" "i{font-style:italic}" "ins{text-decoration:underline}" "kbd{font-family:monospace}" "li{display:list-item}" "menu{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}" "ol{display:block;list-style-type:decimal;margin:1em 0;padding:0 0 0 30pt}" "p{display:block;margin:1em 0}" "pre{display:block;font-family:monospace;margin:1em 0;white-space:pre}" "samp{font-family:monospace}" "script{display:none}" "small{font-size:0.83em}" "strong{font-weight:bold}" "style{display:none}" "sub{font-size:0.83em;vertical-align:sub}" "sup{font-size:0.83em;vertical-align:super}" "table{display:table}" "tbody{display:table-row-group}" "td{display:table-cell;padding:1px}" "tfoot{display:table-footer-group}" "th{display:table-cell;font-weight:bold;padding:1px;text-align:center}" "thead{display:table-header-group}" "tr{display:table-row}" "ul{display:block;list-style-type:disc;margin:1em 0;padding:0 0 0 30pt}" "ul ul{list-style-type:circle}" "ul ul ul{list-style-type:square}" "var{font-style:italic}" "svg{display:none}" ; static const char *fb2_default_css = "@page{margin:3em 2em}" "FictionBook{display:block;margin:1em}" "stylesheet,binary{display:none}" "description>*{display:none}" "description>title-info{display:block}" "description>title-info>*{display:none}" "description>title-info>coverpage{display:block;page-break-before:always;page-break-after:always}" "body,section,title,subtitle,p,cite,epigraph,text-author,date,poem,stanza,v,empty-line{display:block}" "image{display:block}" "p>image{display:inline}" "table{display:table}" "tr{display:table-row}" "th,td{display:table-cell}" "a{color:#06C;text-decoration:underline}" "a[type=note]{font-size:small;vertical-align:super}" "code{white-space:pre;font-family:monospace}" "emphasis{font-style:italic}" "strikethrough{text-decoration:line-through}" "strong{font-weight:bold}" "sub{font-size:small;vertical-align:sub}" "sup{font-size:small;vertical-align:super}" "image{margin:1em 0;text-align:center}" "cite,poem{margin:1em 2em}" "subtitle,epigraph,stanza{margin:1em 0}" "title>p{text-align:center;font-size:x-large}" "subtitle{text-align:center;font-size:large}" "p{margin-top:1em;text-align:justify}" "empty-line{padding-top:1em}" "p+p{margin-top:0;text-indent:1.5em}" "empty-line+p{margin-top:0}" "section>title{page-break-before:always}" ; struct genstate { fz_pool *pool; fz_html_font_set *set; fz_archive *zip; fz_tree *images; int is_fb2; const char *base_uri; fz_css *css; int at_bol; int emit_white; int last_brk_cls; fz_css_style_splay *styles; }; static int iswhite(int c) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } static int is_all_white(const char *s) { while (*s) { if (!iswhite(*s)) return 0; ++s; } return 1; } /* TODO: pool allocator for flow nodes */ /* TODO: store text by pointing to a giant buffer */ static void fz_drop_html_flow(fz_context *ctx, fz_html_flow *flow) { while (flow) { fz_html_flow *next = flow->next; if (flow->type == FLOW_IMAGE) fz_drop_image(ctx, flow->content.image); flow = next; } } static fz_html_flow *add_flow(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, int type, int extras) { size_t size = (type == FLOW_IMAGE ? sizeof(fz_html_flow) : offsetof(fz_html_flow, content) + extras); fz_html_flow *flow = fz_pool_alloc(ctx, pool, size); flow->type = type; flow->expand = 0; flow->bidi_level = 0; flow->markup_lang = 0; flow->breaks_line = 0; flow->box = inline_box; *top->flow_tail = flow; top->flow_tail = &flow->next; return flow; } static void add_flow_space(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) { fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_SPACE, 0); flow->expand = 1; } static void add_flow_break(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) { (void)add_flow(ctx, pool, top, inline_box, FLOW_BREAK, 0); } static void add_flow_sbreak(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) { (void)add_flow(ctx, pool, top, inline_box, FLOW_SBREAK, 0); } static void add_flow_shyphen(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) { (void)add_flow(ctx, pool, top, inline_box, FLOW_SHYPHEN, 0); } static void add_flow_word(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, const char *a, const char *b, int lang) { fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_WORD, b - a + 1); memcpy(flow->content.text, a, b - a); flow->content.text[b - a] = 0; flow->markup_lang = lang; } static void add_flow_image(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box, fz_image *img) { fz_html_flow *flow = add_flow(ctx, pool, top, inline_box, FLOW_IMAGE, 0); flow->content.image = fz_keep_image(ctx, img); } static void add_flow_anchor(fz_context *ctx, fz_pool *pool, fz_html_box *top, fz_html_box *inline_box) { (void)add_flow(ctx, pool, top, inline_box, FLOW_ANCHOR, 0); } static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset) { fz_html_flow *new_flow; char *text; size_t len; assert(flow->type == FLOW_WORD); if (offset == 0) return flow; text = flow->content.text; while (*text && offset) { int rune; text += fz_chartorune(&rune, text); offset--; } len = strlen(text); new_flow = fz_pool_alloc(ctx, pool, offsetof(fz_html_flow, content) + len+1); memcpy(new_flow, flow, offsetof(fz_html_flow, content)); new_flow->next = flow->next; flow->next = new_flow; strcpy(new_flow->content.text, text); *text = 0; return new_flow; } static void flush_space(fz_context *ctx, fz_html_box *flow, fz_html_box *inline_box, int lang, struct genstate *g) { static const char *space = " "; int bsp = inline_box->style->white_space & WS_ALLOW_BREAK_SPACE; fz_pool *pool = g->pool; if (g->emit_white) { if (!g->at_bol) { if (bsp) add_flow_space(ctx, pool, flow, inline_box); else add_flow_word(ctx, pool, flow, inline_box, space, space+1, lang); } g->emit_white = 0; } } /* pair-wise lookup table for UAX#14 linebreaks */ static const char *pairbrk[29] = { /* -OCCQGNESIPPNAHIIHBBBZCWHHJJJR- */ /* -PLPULSXYSROULLDNYAB2WMJ23LVTI- */ "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^", /* OP open punctuation */ "_^^%%^^^^%%_____%%__^^^______", /* CL close punctuation */ "_^^%%^^^^%%%%%__%%__^^^______", /* CP close parenthesis */ "^^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* QU quotation */ "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* GL non-breaking glue */ "_^^%%%^^^_______%%__^^^______", /* NS nonstarters */ "_^^%%%^^^______%%%__^^^______", /* EX exclamation/interrogation */ "_^^%%%^^^__%_%__%%__^^^______", /* SY symbols allowing break after */ "_^^%%%^^^__%%%__%%__^^^______", /* IS infix numeric separator */ "%^^%%%^^^__%%%%_%%__^^^%%%%%_", /* PR prefix numeric */ "%^^%%%^^^__%%%__%%__^^^______", /* PO postfix numeric */ "%^^%%%^^^%%%%%_%%%__^^^______", /* NU numeric */ "%^^%%%^^^__%%%_%%%__^^^______", /* AL ordinary alphabetic and symbol characters */ "%^^%%%^^^__%%%_%%%__^^^______", /* HL hebrew letter */ "_^^%%%^^^_%____%%%__^^^______", /* ID ideographic */ "_^^%%%^^^______%%%__^^^______", /* IN inseparable characters */ "_^^%_%^^^__%____%%__^^^______", /* HY hyphens */ "_^^%_%^^^_______%%__^^^______", /* BA break after */ "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* BB break before */ "_^^%%%^^^_______%%_^^^^______", /* B2 break opportunity before and after */ "____________________^________", /* ZW zero width space */ "%^^%%%^^^__%%%_%%%__^^^______", /* CM combining mark */ "%^^%%%^^^%%%%%%%%%%%^^^%%%%%%", /* WJ word joiner */ "_^^%%%^^^_%____%%%__^^^___%%_", /* H2 hangul leading/vowel syllable */ "_^^%%%^^^_%____%%%__^^^____%_", /* H3 hangul leading/vowel/trailing syllable */ "_^^%%%^^^_%____%%%__^^^%%%%__", /* JL hangul leading jamo */ "_^^%%%^^^_%____%%%__^^^___%%_", /* JV hangul vowel jamo */ "_^^%%%^^^_%____%%%__^^^____%_", /* JT hangul trailing jamo */ "_^^%%%^^^_______%%__^^^_____%", /* RI regional indicator */ }; static void generate_text(fz_context *ctx, fz_html_box *box, const char *text, int lang, struct genstate *g) { fz_html_box *flow; fz_pool *pool = g->pool; int collapse = box->style->white_space & WS_COLLAPSE; int bsp = box->style->white_space & WS_ALLOW_BREAK_SPACE; int bnl = box->style->white_space & WS_FORCE_BREAK_NEWLINE; static const char *space = " "; flow = box; while (flow->type != BOX_FLOW) flow = flow->up; while (*text) { if (bnl && (*text == '\n' || *text == '\r')) { if (text[0] == '\r' && text[1] == '\n') text += 2; else text += 1; add_flow_break(ctx, pool, flow, box); g->at_bol = 1; } else if (iswhite(*text)) { if (collapse) { if (bnl) while (*text == ' ' || *text == '\t') ++text; else while (iswhite(*text)) ++text; g->emit_white = 1; } else { // TODO: tabs if (bsp) add_flow_space(ctx, pool, flow, box); else add_flow_word(ctx, pool, flow, box, space, space+1, lang); ++text; } g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a space */ } else { const char *prev, *mark = text; int c; flush_space(ctx, flow, box, lang, g); if (g->at_bol) g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; while (*text && !iswhite(*text)) { prev = text; text += fz_chartorune(&c, text); if (c == 0xAD) /* soft hyphen */ { if (mark != prev) add_flow_word(ctx, pool, flow, box, mark, prev, lang); add_flow_shyphen(ctx, pool, flow, box); mark = text; g->last_brk_cls = UCDN_LINEBREAK_CLASS_WJ; /* don't add sbreaks after a soft hyphen */ } else if (bsp) /* allow soft breaks */ { int this_brk_cls = ucdn_get_resolved_linebreak_class(c); if (this_brk_cls < UCDN_LINEBREAK_CLASS_RI) { int brk = pairbrk[g->last_brk_cls][this_brk_cls]; /* we handle spaces elsewhere, so ignore these classes */ if (brk == '@') brk = '^'; if (brk == '#') brk = '^'; if (brk == '%') brk = '^'; if (brk == '_') { if (mark != prev) add_flow_word(ctx, pool, flow, box, mark, prev, lang); add_flow_sbreak(ctx, pool, flow, box); mark = prev; } g->last_brk_cls = this_brk_cls; } } } if (mark != text) add_flow_word(ctx, pool, flow, box, mark, text, lang); g->at_bol = 0; } } } static fz_image *load_html_image(fz_context *ctx, fz_archive *zip, const char *base_uri, const char *src) { char path[2048]; fz_image *img = NULL; fz_buffer *buf = NULL; fz_var(img); fz_var(buf); fz_try(ctx) { if (!strncmp(src, "data:image/jpeg;base64,", 23)) buf = fz_new_buffer_from_base64(ctx, src+23, 0); else if (!strncmp(src, "data:image/png;base64,", 22)) buf = fz_new_buffer_from_base64(ctx, src+22, 0); else { fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, src, sizeof path); fz_urldecode(path); fz_cleanname(path); buf = fz_read_archive_entry(ctx, zip, path); } #if FZ_ENABLE_SVG if (strstr(src, ".svg")) img = fz_new_image_from_svg(ctx, buf, base_uri, zip); else #endif img = fz_new_image_from_buffer(ctx, buf); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_warn(ctx, "html: cannot load image src='%s'", src); return img; } static fz_image *load_svg_image(fz_context *ctx, fz_archive *zip, const char *base_uri, fz_xml *xml) { fz_image *img = NULL; fz_try(ctx) img = fz_new_image_from_svg_xml(ctx, xml, base_uri, zip); fz_catch(ctx) fz_warn(ctx, "html: cannot load embedded svg document"); return img; } static void generate_anchor(fz_context *ctx, fz_html_box *box, struct genstate *g) { fz_pool *pool = g->pool; fz_html_box *flow = box; while (flow->type != BOX_FLOW) flow = flow->up; add_flow_anchor(ctx, pool, flow, box); } static void generate_image(fz_context *ctx, fz_html_box *box, fz_image *img, struct genstate *g) { fz_html_box *flow = box; fz_pool *pool = g->pool; while (flow->type != BOX_FLOW) flow = flow->up; flush_space(ctx, flow, box, 0, g); if (!img) { const char *alt = "[image]"; add_flow_word(ctx, pool, flow, box, alt, alt + 7, 0); } else { fz_try(ctx) { add_flow_sbreak(ctx, pool, flow, box); add_flow_image(ctx, pool, flow, box, img); add_flow_sbreak(ctx, pool, flow, box); } fz_always(ctx) { fz_drop_image(ctx, img); } fz_catch(ctx) fz_rethrow(ctx); } g->at_bol = 0; } static void init_box(fz_context *ctx, fz_html_box *box, fz_bidi_direction markup_dir) { box->type = BOX_BLOCK; box->x = box->y = 0; box->w = box->b = 0; box->up = NULL; box->down = NULL; box->next = NULL; box->flow_head = NULL; box->flow_tail = &box->flow_head; box->markup_dir = markup_dir; box->style = NULL; } static void fz_drop_html_box(fz_context *ctx, fz_html_box *box) { while (box) { fz_html_box *next = box->next; fz_drop_html_flow(ctx, box->flow_head); fz_drop_html_box(ctx, box->down); box = next; } } static void fz_drop_html_imp(fz_context *ctx, fz_storable *stor) { fz_html *html = (fz_html *)stor; fz_drop_html_box(ctx, html->root); fz_drop_pool(ctx, html->pool); } void fz_drop_html(fz_context *ctx, fz_html *html) { fz_defer_reap_start(ctx); fz_drop_storable(ctx, &html->storable); fz_defer_reap_end(ctx); } fz_html *fz_keep_html(fz_context *ctx, fz_html *html) { return fz_keep_storable(ctx, &html->storable); } static fz_html_box *new_box(fz_context *ctx, fz_pool *pool, fz_bidi_direction markup_dir) { fz_html_box *box = fz_pool_alloc(ctx, pool, sizeof *box); init_box(ctx, box, markup_dir); return box; } static fz_html_box *new_short_box(fz_context *ctx, fz_pool *pool, fz_bidi_direction markup_dir) { fz_html_box *box = fz_pool_alloc(ctx, pool, offsetof(fz_html_box, padding)); init_box(ctx, box, markup_dir); return box; } static void insert_box(fz_context *ctx, fz_html_box *box, int type, fz_html_box *top) { box->type = type; box->up = top; if (top) { /* Here 'next' really means 'last of my children'. This will * be fixed up in a pass at the end of parsing. */ if (!top->next) { top->down = top->next = box; } else { top->next->next = box; /* Here next actually means next */ top->next = box; } } } static fz_html_box *insert_block_box(fz_context *ctx, fz_html_box *box, fz_html_box *top) { if (top->type == BOX_BLOCK) { insert_box(ctx, box, BOX_BLOCK, top); } else if (top->type == BOX_FLOW) { while (top->type != BOX_BLOCK) top = top->up; insert_box(ctx, box, BOX_BLOCK, top); } else if (top->type == BOX_INLINE) { while (top->type != BOX_BLOCK) top = top->up; insert_box(ctx, box, BOX_BLOCK, top); } return top; } static fz_html_box *insert_table_box(fz_context *ctx, fz_html_box *box, fz_html_box *top) { top = insert_block_box(ctx, box, top); box->type = BOX_TABLE; return top; } static fz_html_box *insert_table_row_box(fz_context *ctx, fz_html_box *box, fz_html_box *top) { fz_html_box *table = top; while (table && table->type != BOX_TABLE) table = table->up; if (table) { insert_box(ctx, box, BOX_TABLE_ROW, table); return table; } fz_warn(ctx, "table-row not inside table element"); insert_block_box(ctx, box, top); return top; } static fz_html_box *insert_table_cell_box(fz_context *ctx, fz_html_box *box, fz_html_box *top) { fz_html_box *tr = top; while (tr && tr->type != BOX_TABLE_ROW) tr = tr->up; if (tr) { insert_box(ctx, box, BOX_TABLE_CELL, tr); return tr; } fz_warn(ctx, "table-cell not inside table-row element"); insert_block_box(ctx, box, top); return top; } static fz_html_box *insert_break_box(fz_context *ctx, fz_html_box *box, fz_html_box *top) { if (top->type == BOX_BLOCK) { insert_box(ctx, box, BOX_BREAK, top); } else if (top->type == BOX_FLOW) { while (top->type != BOX_BLOCK) top = top->up; insert_box(ctx, box, BOX_BREAK, top); } else if (top->type == BOX_INLINE) { while (top->type != BOX_BLOCK) top = top->up; insert_box(ctx, box, BOX_BREAK, top); } return top; } static void insert_inline_box(fz_context *ctx, fz_html_box *box, fz_html_box *top, int markup_dir, struct genstate *g) { if (top->type == BOX_FLOW || top->type == BOX_INLINE) { insert_box(ctx, box, BOX_INLINE, top); } else { while (top->type != BOX_BLOCK && top->type != BOX_TABLE_CELL) top = top->up; /* Here 'next' actually means 'last of my children' */ if (top->next && top->next->type == BOX_FLOW) { insert_box(ctx, box, BOX_INLINE, top->next); } else { fz_css_style style; fz_html_box *flow = new_short_box(ctx, g->pool, markup_dir); flow->is_first_flow = !top->next; fz_default_css_style(ctx, &style); flow->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_box(ctx, flow, BOX_FLOW, top); insert_box(ctx, box, BOX_INLINE, flow); g->at_bol = 1; } } } static fz_html_box * generate_boxes(fz_context *ctx, fz_xml *node, fz_html_box *top, fz_css_match *up_match, int list_counter, int section_depth, int markup_dir, int markup_lang, struct genstate *g) { fz_css_match match; fz_html_box *box, *last_top; const char *tag; int display; fz_css_style style; while (node) { match.up = up_match; match.count = 0; tag = fz_xml_tag(node); if (tag) { fz_match_css(ctx, &match, g->css, node); display = fz_get_css_match_display(&match); if (tag[0]=='b' && tag[1]=='r' && tag[2]==0) { if (top->type == BOX_INLINE) { fz_html_box *flow = top; while (flow->type != BOX_FLOW) flow = flow->up; add_flow_break(ctx, g->pool, flow, top); } else { box = new_short_box(ctx, g->pool, markup_dir); fz_apply_css_style(ctx, g->set, &style, &match); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); top = insert_break_box(ctx, box, top); } g->at_bol = 1; } else if (tag[0]=='i' && tag[1]=='m' && tag[2]=='g' && tag[3]==0) { const char *src = fz_xml_att(node, "src"); if (src) { int w, h; const char *w_att = fz_xml_att(node, "width"); const char *h_att = fz_xml_att(node, "height"); box = new_short_box(ctx, g->pool, markup_dir); fz_apply_css_style(ctx, g->set, &style, &match); if (w_att && (w = fz_atoi(w_att)) > 0) { style.width.value = w; style.width.unit = strchr(w_att, '%') ? N_PERCENT : N_LENGTH; } if (h_att && (h = fz_atoi(h_att)) > 0) { style.height.value = h; style.height.unit = strchr(h_att, '%') ? N_PERCENT : N_LENGTH; } box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_inline_box(ctx, box, top, markup_dir, g); generate_image(ctx, box, load_html_image(ctx, g->zip, g->base_uri, src), g); } } else if (tag[0]=='s' && tag[1]=='v' && tag[2]=='g' && tag[3]==0) { box = new_short_box(ctx, g->pool, markup_dir); fz_apply_css_style(ctx, g->set, &style, &match); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_inline_box(ctx, box, top, markup_dir, g); generate_image(ctx, box, load_svg_image(ctx, g->zip, g->base_uri, node), g); } else if (g->is_fb2 && tag[0]=='i' && tag[1]=='m' && tag[2]=='a' && tag[3]=='g' && tag[4]=='e' && tag[5]==0) { const char *src = fz_xml_att(node, "l:href"); if (!src) src = fz_xml_att(node, "xlink:href"); if (src && src[0] == '#') { fz_image *img = fz_tree_lookup(ctx, g->images, src+1); if (display == DIS_BLOCK) { fz_html_box *imgbox; box = new_box(ctx, g->pool, markup_dir); fz_default_css_style(ctx, &style); fz_apply_css_style(ctx, g->set, &style, &match); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); top = insert_block_box(ctx, box, top); imgbox = new_short_box(ctx, g->pool, markup_dir); fz_apply_css_style(ctx, g->set, &style, &match); imgbox->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_inline_box(ctx, imgbox, box, markup_dir, g); generate_image(ctx, imgbox, fz_keep_image(ctx, img), g); } else if (display == DIS_INLINE) { box = new_short_box(ctx, g->pool, markup_dir); fz_apply_css_style(ctx, g->set, &style, &match); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_inline_box(ctx, box, top, markup_dir, g); generate_image(ctx, box, fz_keep_image(ctx, img), g); } } } else if (display != DIS_NONE) { const char *dir, *lang, *id, *href; int child_dir = markup_dir; int child_lang = markup_lang; dir = fz_xml_att(node, "dir"); if (dir) { if (!strcmp(dir, "auto")) child_dir = FZ_BIDI_NEUTRAL; else if (!strcmp(dir, "rtl")) child_dir = FZ_BIDI_RTL; else if (!strcmp(dir, "ltr")) child_dir = FZ_BIDI_LTR; else child_dir = DEFAULT_DIR; } lang = fz_xml_att(node, "lang"); if (lang) child_lang = fz_text_language_from_string(lang); if (display == DIS_INLINE) box = new_short_box(ctx, g->pool, child_dir); else box = new_box(ctx, g->pool, child_dir); fz_default_css_style(ctx, &style); fz_apply_css_style(ctx, g->set, &style, &match); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); id = fz_xml_att(node, "id"); if (id) box->id = fz_pool_strdup(ctx, g->pool, id); if (display == DIS_BLOCK || display == DIS_INLINE_BLOCK) { top = insert_block_box(ctx, box, top); if (g->is_fb2) { if (!strcmp(tag, "title") || !strcmp(tag, "subtitle")) box->heading = fz_mini(section_depth, 6); } else { if (tag[0]=='h' && tag[1]>='1' && tag[1]<='6' && tag[2]==0) box->heading = tag[1] - '0'; } } else if (display == DIS_LIST_ITEM) { top = insert_block_box(ctx, box, top); box->list_item = ++list_counter; } else if (display == DIS_INLINE) { insert_inline_box(ctx, box, top, child_dir, g); if (id) generate_anchor(ctx, box, g); if (tag[0]=='a' && tag[1]==0) { if (g->is_fb2) { href = fz_xml_att(node, "l:href"); if (!href) href = fz_xml_att(node, "xlink:href"); } else href = fz_xml_att(node, g->is_fb2 ? "l:href" : "href"); if (href) box->href = fz_pool_strdup(ctx, g->pool, href); } } else if (display == DIS_TABLE) { top = insert_table_box(ctx, box, top); } else if (display == DIS_TABLE_ROW) { top = insert_table_row_box(ctx, box, top); } else if (display == DIS_TABLE_CELL) { top = insert_table_cell_box(ctx, box, top); } else { fz_warn(ctx, "unknown box display type"); insert_box(ctx, box, BOX_BLOCK, top); } if (fz_xml_down(node)) { int child_counter = list_counter; int child_section = section_depth; if (!strcmp(tag, "ul") || !strcmp(tag, "ol")) child_counter = 0; if (!strcmp(tag, "section")) ++child_section; last_top = generate_boxes(ctx, fz_xml_down(node), box, &match, child_counter, child_section, child_dir, child_lang, g); if (last_top != box) top = last_top; } } } else { const char *text = fz_xml_text(node); int collapse = top->style->white_space & WS_COLLAPSE; if (collapse && is_all_white(text)) { g->emit_white = 1; } else { if (top->type != BOX_INLINE) { /* Create anonymous inline box, with the same style as the top block box. */ fz_css_style style; box = new_short_box(ctx, g->pool, markup_dir); fz_default_css_style(ctx, &style); box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); insert_inline_box(ctx, box, top, markup_dir, g); style = *top->style; /* Make sure not to recursively multiply font sizes. */ style.font_size.value = 1; style.font_size.unit = N_SCALE; box->style = fz_css_enlist(ctx, &style, &g->styles, g->pool); generate_text(ctx, box, text, markup_lang, g); } else { generate_text(ctx, top, text, markup_lang, g); } } } node = fz_xml_next(node); } return top; } static char *concat_text(fz_context *ctx, fz_xml *root) { fz_xml *node; size_t i = 0, n = 1; char *s; for (node = fz_xml_down(root); node; node = fz_xml_next(node)) { const char *text = fz_xml_text(node); n += text ? strlen(text) : 0; } s = fz_malloc(ctx, n); for (node = fz_xml_down(root); node; node = fz_xml_next(node)) { const char *text = fz_xml_text(node); if (text) { n = strlen(text); memcpy(s+i, text, n); i += n; } } s[i] = 0; return s; } static void html_load_css_link(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root, const char *href) { char path[2048]; char css_base_uri[2048]; fz_buffer *buf; fz_var(buf); fz_strlcpy(path, base_uri, sizeof path); fz_strlcat(path, "/", sizeof path); fz_strlcat(path, href, sizeof path); fz_urldecode(path); fz_cleanname(path); fz_dirname(css_base_uri, path, sizeof css_base_uri); buf = NULL; fz_try(ctx) { buf = fz_read_archive_entry(ctx, zip, path); fz_parse_css(ctx, css, fz_string_from_buffer(ctx, buf), path); fz_add_css_font_faces(ctx, set, zip, css_base_uri, css); } fz_always(ctx) fz_drop_buffer(ctx, buf); fz_catch(ctx) fz_warn(ctx, "ignoring stylesheet %s", path); } static void html_load_css(fz_context *ctx, fz_html_font_set *set, fz_archive *zip, const char *base_uri, fz_css *css, fz_xml *root) { fz_xml *html, *head, *node; html = fz_xml_find(root, "html"); head = fz_xml_find_down(html, "head"); for (node = fz_xml_down(head); node; node = fz_xml_next(node)) { if (fz_xml_is_tag(node, "link")) { char *rel = fz_xml_att(node, "rel"); if (rel && !fz_strcasecmp(rel, "stylesheet")) { char *type = fz_xml_att(node, "type"); if ((type && !strcmp(type, "text/css")) || !type) { char *href = fz_xml_att(node, "href"); if (href) { html_load_css_link(ctx, set, zip, base_uri, css, root, href); } } } } else if (fz_xml_is_tag(node, "style")) { char *s = concat_text(ctx, node); fz_try(ctx) fz_parse_css(ctx, css, s, "