#include "mupdf/fitz.h" #include "mupdf/pdf.h" #include #include #include int pdf_count_pages(fz_context *ctx, pdf_document *doc) { /* FIXME: We should reset linear_page_count to 0 when editing starts * (or when linear loading ends) */ if (doc->linear_page_count != 0) return doc->linear_page_count; return pdf_to_int(ctx, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages/Count")); } int pdf_count_pages_imp(fz_context *ctx, fz_document *doc, int chapter) { return pdf_count_pages(ctx, (pdf_document*)doc); } static int pdf_load_page_tree_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int idx) { pdf_obj *type = pdf_dict_get(ctx, node, PDF_NAME(Type)); if (pdf_name_eq(ctx, type, PDF_NAME(Pages))) { pdf_obj *kids = pdf_dict_get(ctx, node, PDF_NAME(Kids)); int i, n = pdf_array_len(ctx, kids); if (pdf_mark_obj(ctx, node)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); fz_try(ctx) for (i = 0; i < n; ++i) idx = pdf_load_page_tree_imp(ctx, doc, pdf_array_get(ctx, kids, i), idx); fz_always(ctx) pdf_unmark_obj(ctx, node); fz_catch(ctx) fz_rethrow(ctx); } else if (pdf_name_eq(ctx, type, PDF_NAME(Page))) { if (idx >= doc->rev_page_count) fz_throw(ctx, FZ_ERROR_GENERIC, "too many kids in page tree"); doc->rev_page_map[idx].page = idx; doc->rev_page_map[idx].object = pdf_to_num(ctx, node); ++idx; } else { fz_throw(ctx, FZ_ERROR_GENERIC, "non-page object in page tree"); } return idx; } static int cmp_rev_page_map(const void *va, const void *vb) { const pdf_rev_page_map *a = va; const pdf_rev_page_map *b = vb; return a->object - b->object; } void pdf_load_page_tree(fz_context *ctx, pdf_document *doc) { if (!doc->rev_page_map) { doc->rev_page_count = pdf_count_pages(ctx, doc); doc->rev_page_map = fz_malloc_array(ctx, doc->rev_page_count, pdf_rev_page_map); pdf_load_page_tree_imp(ctx, doc, pdf_dict_getp(ctx, pdf_trailer(ctx, doc), "Root/Pages"), 0); qsort(doc->rev_page_map, doc->rev_page_count, sizeof *doc->rev_page_map, cmp_rev_page_map); } } void pdf_drop_page_tree(fz_context *ctx, pdf_document *doc) { fz_free(ctx, doc->rev_page_map); doc->rev_page_map = NULL; doc->rev_page_count = 0; } enum { LOCAL_STACK_SIZE = 16 }; static pdf_obj * pdf_lookup_page_loc_imp(fz_context *ctx, pdf_document *doc, pdf_obj *node, int *skip, pdf_obj **parentp, int *indexp) { pdf_obj *kids; pdf_obj *hit = NULL; int i, len; pdf_obj *local_stack[LOCAL_STACK_SIZE]; pdf_obj **stack = &local_stack[0]; int stack_max = LOCAL_STACK_SIZE; int stack_len = 0; fz_var(hit); fz_var(stack); fz_var(stack_len); fz_var(stack_max); fz_try(ctx) { do { kids = pdf_dict_get(ctx, node, PDF_NAME(Kids)); len = pdf_array_len(ctx, kids); if (len == 0) fz_throw(ctx, FZ_ERROR_GENERIC, "malformed page tree"); /* Every node we need to unmark goes into the stack */ if (stack_len == stack_max) { if (stack == &local_stack[0]) { stack = fz_malloc_array(ctx, stack_max * 2, pdf_obj*); memcpy(stack, &local_stack[0], stack_max * sizeof(*stack)); } else { stack = fz_realloc_array(ctx, stack, stack_max * 2, pdf_obj*); } stack_max *= 2; } stack[stack_len++] = node; if (pdf_mark_obj(ctx, node)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree"); for (i = 0; i < len; i++) { pdf_obj *kid = pdf_array_get(ctx, kids, i); pdf_obj *type = pdf_dict_get(ctx, kid, PDF_NAME(Type)); if (type ? pdf_name_eq(ctx, type, PDF_NAME(Pages)) : pdf_dict_get(ctx, kid, PDF_NAME(Kids)) && !pdf_dict_get(ctx, kid, PDF_NAME(MediaBox))) { int count = pdf_dict_get_int(ctx, kid, PDF_NAME(Count)); if (*skip < count) { node = kid; break; } else { *skip -= count; } } else { if (type ? !pdf_name_eq(ctx, type, PDF_NAME(Page)) : !pdf_dict_get(ctx, kid, PDF_NAME(MediaBox))) fz_warn(ctx, "non-page object in page tree (%s)", pdf_to_name(ctx, type)); if (*skip == 0) { if (parentp) *parentp = node; if (indexp) *indexp = i; hit = kid; break; } else { (*skip)--; } } } } /* If i < len && hit != NULL the desired page was found in the Kids array, done. If i < len && hit == NULL the found page tree node contains a Kids array that contains the desired page, loop back to top to extract it. When i == len the Kids array has been exhausted without finding the desired page, give up. */ while (hit == NULL && i < len); } fz_always(ctx) { for (i = stack_len; i > 0; i--) pdf_unmark_obj(ctx, stack[i-1]); if (stack != &local_stack[0]) fz_free(ctx, stack); } fz_catch(ctx) { fz_rethrow(ctx); } return hit; } pdf_obj * pdf_lookup_page_loc(fz_context *ctx, pdf_document *doc, int needle, pdf_obj **parentp, int *indexp) { pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); pdf_obj *node = pdf_dict_get(ctx, root, PDF_NAME(Pages)); int skip = needle; pdf_obj *hit; if (!node) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page tree"); hit = pdf_lookup_page_loc_imp(ctx, doc, node, &skip, parentp, indexp); if (!hit) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page %d in page tree", needle+1); return hit; } pdf_obj * pdf_lookup_page_obj(fz_context *ctx, pdf_document *doc, int needle) { return pdf_lookup_page_loc(ctx, doc, needle, NULL, NULL); } static int pdf_count_pages_before_kid(fz_context *ctx, pdf_document *doc, pdf_obj *parent, int kid_num) { pdf_obj *kids = pdf_dict_get(ctx, parent, PDF_NAME(Kids)); int i, total = 0, len = pdf_array_len(ctx, kids); for (i = 0; i < len; i++) { pdf_obj *kid = pdf_array_get(ctx, kids, i); if (pdf_to_num(ctx, kid) == kid_num) return total; if (pdf_name_eq(ctx, pdf_dict_get(ctx, kid, PDF_NAME(Type)), PDF_NAME(Pages))) { pdf_obj *count = pdf_dict_get(ctx, kid, PDF_NAME(Count)); int n = pdf_to_int(ctx, count); if (!pdf_is_int(ctx, count) || n < 0) fz_throw(ctx, FZ_ERROR_GENERIC, "illegal or missing count in pages tree"); total += n; } else total++; } fz_throw(ctx, FZ_ERROR_GENERIC, "kid not found in parent's kids array"); } static int pdf_lookup_page_number_slow(fz_context *ctx, pdf_document *doc, pdf_obj *node) { int needle = pdf_to_num(ctx, node); int total = 0; pdf_obj *parent, *parent2; if (!pdf_name_eq(ctx, pdf_dict_get(ctx, node, PDF_NAME(Type)), PDF_NAME(Page))) fz_throw(ctx, FZ_ERROR_GENERIC, "invalid page object"); parent2 = parent = pdf_dict_get(ctx, node, PDF_NAME(Parent)); fz_var(parent); fz_try(ctx) { while (pdf_is_dict(ctx, parent)) { if (pdf_mark_obj(ctx, parent)) fz_throw(ctx, FZ_ERROR_GENERIC, "cycle in page tree (parents)"); total += pdf_count_pages_before_kid(ctx, doc, parent, needle); needle = pdf_to_num(ctx, parent); parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent)); } } fz_always(ctx) { /* Run back and unmark */ while (parent2) { pdf_unmark_obj(ctx, parent2); if (parent2 == parent) break; parent2 = pdf_dict_get(ctx, parent2, PDF_NAME(Parent)); } } fz_catch(ctx) { fz_rethrow(ctx); } return total; } static int pdf_lookup_page_number_fast(fz_context *ctx, pdf_document *doc, int needle) { int l = 0; int r = doc->rev_page_count - 1; while (l <= r) { int m = (l + r) >> 1; int c = needle - doc->rev_page_map[m].object; if (c < 0) r = m - 1; else if (c > 0) l = m + 1; else return doc->rev_page_map[m].page; } return -1; } int pdf_lookup_page_number(fz_context *ctx, pdf_document *doc, pdf_obj *page) { if (doc->rev_page_map) return pdf_lookup_page_number_fast(ctx, doc, pdf_to_num(ctx, page)); else return pdf_lookup_page_number_slow(ctx, doc, page); } /* Find the page number of a named destination. For use with looking up the destination page of a fragment identifier in hyperlinks: foo.pdf#bar or foo.pdf#page=5. */ int pdf_lookup_anchor(fz_context *ctx, pdf_document *doc, const char *name, float *xp, float *yp) { pdf_obj *needle, *dest = NULL; char *uri; if (xp) *xp = 0; if (yp) *yp = 0; needle = pdf_new_string(ctx, name, strlen(name)); fz_try(ctx) dest = pdf_lookup_dest(ctx, doc, needle); fz_always(ctx) pdf_drop_obj(ctx, needle); fz_catch(ctx) fz_rethrow(ctx); if (dest) { uri = pdf_parse_link_dest(ctx, doc, dest); return pdf_resolve_link(ctx, doc, uri, xp, yp); } if (!strncmp(name, "page=", 5)) return fz_atoi(name + 5) - 1; return fz_atoi(name) - 1; } static void pdf_flatten_inheritable_page_item(fz_context *ctx, pdf_obj *page, pdf_obj *key) { pdf_obj *val = pdf_dict_get_inheritable(ctx, page, key); if (val) pdf_dict_put(ctx, page, key, val); } /* Make page self sufficient. Copy any inheritable page keys into the actual page object, removing any dependencies on the page tree parents. */ void pdf_flatten_inheritable_page_items(fz_context *ctx, pdf_obj *page) { pdf_flatten_inheritable_page_item(ctx, page, PDF_NAME(MediaBox)); pdf_flatten_inheritable_page_item(ctx, page, PDF_NAME(CropBox)); pdf_flatten_inheritable_page_item(ctx, page, PDF_NAME(Rotate)); pdf_flatten_inheritable_page_item(ctx, page, PDF_NAME(Resources)); } /* We need to know whether to install a page-level transparency group */ /* * Object memo flags - allows us to secretly remember "a memo" (a bool) in an * object, and to read back whether there was a memo, and if so, what it was. */ enum { PDF_FLAGS_MEMO_BM = 0, PDF_FLAGS_MEMO_OP = 1 }; static int pdf_resources_use_blending(fz_context *ctx, pdf_obj *rdb); static int pdf_extgstate_uses_blending(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj = pdf_dict_get(ctx, dict, PDF_NAME(BM)); if (obj && !pdf_name_eq(ctx, obj, PDF_NAME(Normal))) return 1; return 0; } static int pdf_pattern_uses_blending(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj; obj = pdf_dict_get(ctx, dict, PDF_NAME(Resources)); if (pdf_resources_use_blending(ctx, obj)) return 1; obj = pdf_dict_get(ctx, dict, PDF_NAME(ExtGState)); return pdf_extgstate_uses_blending(ctx, obj); } static int pdf_xobject_uses_blending(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj = pdf_dict_get(ctx, dict, PDF_NAME(Resources)); if (pdf_name_eq(ctx, pdf_dict_getp(ctx, dict, "Group/S"), PDF_NAME(Transparency))) return 1; return pdf_resources_use_blending(ctx, obj); } static int pdf_resources_use_blending(fz_context *ctx, pdf_obj *rdb) { pdf_obj *obj; int i, n, useBM = 0; if (!rdb) return 0; /* Have we been here before and remembered an answer? */ if (pdf_obj_memo(ctx, rdb, PDF_FLAGS_MEMO_BM, &useBM)) return useBM; /* stop on cyclic resource dependencies */ if (pdf_mark_obj(ctx, rdb)) return 0; fz_try(ctx) { obj = pdf_dict_get(ctx, rdb, PDF_NAME(ExtGState)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_extgstate_uses_blending(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; obj = pdf_dict_get(ctx, rdb, PDF_NAME(Pattern)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_pattern_uses_blending(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; obj = pdf_dict_get(ctx, rdb, PDF_NAME(XObject)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_xobject_uses_blending(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; if (0) { found: useBM = 1; } } fz_always(ctx) { pdf_unmark_obj(ctx, rdb); } fz_catch(ctx) { fz_rethrow(ctx); } pdf_set_obj_memo(ctx, rdb, PDF_FLAGS_MEMO_BM, useBM); return useBM; } static int pdf_resources_use_overprint(fz_context *ctx, pdf_obj *rdb); static int pdf_extgstate_uses_overprint(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj = pdf_dict_get(ctx, dict, PDF_NAME(OP)); if (obj && pdf_to_bool(ctx, obj)) return 1; return 0; } static int pdf_pattern_uses_overprint(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj; obj = pdf_dict_get(ctx, dict, PDF_NAME(Resources)); if (pdf_resources_use_overprint(ctx, obj)) return 1; obj = pdf_dict_get(ctx, dict, PDF_NAME(ExtGState)); return pdf_extgstate_uses_overprint(ctx, obj); } static int pdf_xobject_uses_overprint(fz_context *ctx, pdf_obj *dict) { pdf_obj *obj = pdf_dict_get(ctx, dict, PDF_NAME(Resources)); return pdf_resources_use_overprint(ctx, obj); } static int pdf_resources_use_overprint(fz_context *ctx, pdf_obj *rdb) { pdf_obj *obj; int i, n, useOP = 0; if (!rdb) return 0; /* Have we been here before and remembered an answer? */ if (pdf_obj_memo(ctx, rdb, PDF_FLAGS_MEMO_OP, &useOP)) return useOP; /* stop on cyclic resource dependencies */ if (pdf_mark_obj(ctx, rdb)) return 0; fz_try(ctx) { obj = pdf_dict_get(ctx, rdb, PDF_NAME(ExtGState)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_extgstate_uses_overprint(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; obj = pdf_dict_get(ctx, rdb, PDF_NAME(Pattern)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_pattern_uses_overprint(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; obj = pdf_dict_get(ctx, rdb, PDF_NAME(XObject)); n = pdf_dict_len(ctx, obj); for (i = 0; i < n; i++) if (pdf_xobject_uses_overprint(ctx, pdf_dict_get_val(ctx, obj, i))) goto found; if (0) { found: useOP = 1; } } fz_always(ctx) { pdf_unmark_obj(ctx, rdb); } fz_catch(ctx) { fz_rethrow(ctx); } pdf_set_obj_memo(ctx, rdb, PDF_FLAGS_MEMO_OP, useOP); return useOP; } fz_transition * pdf_page_presentation(fz_context *ctx, pdf_page *page, fz_transition *transition, float *duration) { pdf_obj *obj, *transdict; *duration = pdf_dict_get_real(ctx, page->obj, PDF_NAME(Dur)); transdict = pdf_dict_get(ctx, page->obj, PDF_NAME(Trans)); if (!transdict) return NULL; obj = pdf_dict_get(ctx, transdict, PDF_NAME(D)); transition->duration = (obj ? pdf_to_real(ctx, obj) : 1); transition->vertical = !pdf_name_eq(ctx, pdf_dict_get(ctx, transdict, PDF_NAME(Dm)), PDF_NAME(H)); transition->outwards = !pdf_name_eq(ctx, pdf_dict_get(ctx, transdict, PDF_NAME(M)), PDF_NAME(I)); /* FIXME: If 'Di' is None, it should be handled differently, but * this only affects Fly, and we don't implement that currently. */ transition->direction = (pdf_dict_get_int(ctx, transdict, PDF_NAME(Di))); /* FIXME: Read SS for Fly when we implement it */ /* FIXME: Read B for Fly when we implement it */ obj = pdf_dict_get(ctx, transdict, PDF_NAME(S)); if (pdf_name_eq(ctx, obj, PDF_NAME(Split))) transition->type = FZ_TRANSITION_SPLIT; else if (pdf_name_eq(ctx, obj, PDF_NAME(Blinds))) transition->type = FZ_TRANSITION_BLINDS; else if (pdf_name_eq(ctx, obj, PDF_NAME(Box))) transition->type = FZ_TRANSITION_BOX; else if (pdf_name_eq(ctx, obj, PDF_NAME(Wipe))) transition->type = FZ_TRANSITION_WIPE; else if (pdf_name_eq(ctx, obj, PDF_NAME(Dissolve))) transition->type = FZ_TRANSITION_DISSOLVE; else if (pdf_name_eq(ctx, obj, PDF_NAME(Glitter))) transition->type = FZ_TRANSITION_GLITTER; else if (pdf_name_eq(ctx, obj, PDF_NAME(Fly))) transition->type = FZ_TRANSITION_FLY; else if (pdf_name_eq(ctx, obj, PDF_NAME(Push))) transition->type = FZ_TRANSITION_PUSH; else if (pdf_name_eq(ctx, obj, PDF_NAME(Cover))) transition->type = FZ_TRANSITION_COVER; else if (pdf_name_eq(ctx, obj, PDF_NAME(Uncover))) transition->type = FZ_TRANSITION_UNCOVER; else if (pdf_name_eq(ctx, obj, PDF_NAME(Fade))) transition->type = FZ_TRANSITION_FADE; else transition->type = FZ_TRANSITION_NONE; return transition; } /* Determine the size of a page. Determine the page size in user space units, taking page rotation into account. The page size is taken to be the crop box if it exists (visible area after cropping), otherwise the media box will be used (possibly including printing marks). */ fz_rect pdf_bound_page(fz_context *ctx, pdf_page *page) { fz_matrix page_ctm; fz_rect mediabox; pdf_page_transform(ctx, page, &mediabox, &page_ctm); return fz_transform_rect(mediabox, page_ctm); } fz_link * pdf_load_links(fz_context *ctx, pdf_page *page) { return fz_keep_link(ctx, page->links); } pdf_obj * pdf_page_resources(fz_context *ctx, pdf_page *page) { return pdf_dict_get_inheritable(ctx, page->obj, PDF_NAME(Resources)); } pdf_obj * pdf_page_contents(fz_context *ctx, pdf_page *page) { return pdf_dict_get(ctx, page->obj, PDF_NAME(Contents)); } pdf_obj * pdf_page_group(fz_context *ctx, pdf_page *page) { return pdf_dict_get(ctx, page->obj, PDF_NAME(Group)); } void pdf_page_obj_transform(fz_context *ctx, pdf_obj *pageobj, fz_rect *page_mediabox, fz_matrix *page_ctm) { pdf_obj *obj; fz_rect mediabox, cropbox, realbox, pagebox; float userunit = 1; int rotate; if (!page_mediabox) page_mediabox = &pagebox; obj = pdf_dict_get(ctx, pageobj, PDF_NAME(UserUnit)); if (pdf_is_real(ctx, obj)) userunit = pdf_to_real(ctx, obj); mediabox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(MediaBox))); if (fz_is_empty_rect(mediabox)) { mediabox.x0 = 0; mediabox.y0 = 0; mediabox.x1 = 612; mediabox.y1 = 792; } cropbox = pdf_to_rect(ctx, pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(CropBox))); if (!fz_is_empty_rect(cropbox)) mediabox = fz_intersect_rect(mediabox, cropbox); page_mediabox->x0 = fz_min(mediabox.x0, mediabox.x1); page_mediabox->y0 = fz_min(mediabox.y0, mediabox.y1); page_mediabox->x1 = fz_max(mediabox.x0, mediabox.x1); page_mediabox->y1 = fz_max(mediabox.y0, mediabox.y1); if (page_mediabox->x1 - page_mediabox->x0 < 1 || page_mediabox->y1 - page_mediabox->y0 < 1) *page_mediabox = fz_unit_rect; rotate = pdf_to_int(ctx, pdf_dict_get_inheritable(ctx, pageobj, PDF_NAME(Rotate))); /* Snap page rotation to 0, 90, 180 or 270 */ if (rotate < 0) rotate = 360 - ((-rotate) % 360); if (rotate >= 360) rotate = rotate % 360; rotate = 90*((rotate + 45)/90); if (rotate >= 360) rotate = 0; /* Compute transform from fitz' page space (upper left page origin, y descending, 72 dpi) * to PDF user space (arbitrary page origin, y ascending, UserUnit dpi). */ /* Make left-handed and scale by UserUnit */ *page_ctm = fz_scale(userunit, -userunit); /* Rotate */ *page_ctm = fz_pre_rotate(*page_ctm, -rotate); /* Translate page origin to 0,0 */ realbox = fz_transform_rect(*page_mediabox, *page_ctm); *page_ctm = fz_concat(*page_ctm, fz_translate(-realbox.x0, -realbox.y0)); } void pdf_page_transform(fz_context *ctx, pdf_page *page, fz_rect *page_mediabox, fz_matrix *page_ctm) { pdf_page_obj_transform(ctx, page->obj, page_mediabox, page_ctm); } static void find_seps(fz_context *ctx, fz_separations **seps, pdf_obj *obj, pdf_obj *clearme) { int i, n; pdf_obj *nameobj; /* Indexed and DeviceN may have cyclic references */ if (pdf_is_indirect(ctx, obj)) { if (pdf_mark_obj(ctx, obj)) return; /* already been here */ /* remember to clear this colorspace dictionary at the end */ pdf_array_push(ctx, clearme, obj); } nameobj = pdf_array_get(ctx, obj, 0); if (pdf_name_eq(ctx, nameobj, PDF_NAME(Separation))) { fz_colorspace *cs; const char *name = pdf_to_name(ctx, pdf_array_get(ctx, obj, 1)); /* Skip 'special' colorants. */ if (!strcmp(name, "Black") || !strcmp(name, "Cyan") || !strcmp(name, "Magenta") || !strcmp(name, "Yellow") || !strcmp(name, "All") || !strcmp(name, "None")) return; n = fz_count_separations(ctx, *seps); for (i = 0; i < n; i++) { if (!strcmp(name, fz_separation_name(ctx, *seps, i))) return; /* Got that one already */ } fz_try(ctx) cs = pdf_load_colorspace(ctx, obj); fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); return; /* ignore broken colorspace */ } fz_try(ctx) { if (!*seps) *seps = fz_new_separations(ctx, 0); fz_add_separation(ctx, *seps, name, cs, 0); } fz_always(ctx) fz_drop_colorspace(ctx, cs); fz_catch(ctx) fz_rethrow(ctx); } else if (pdf_name_eq(ctx, nameobj, PDF_NAME(Indexed))) { find_seps(ctx, seps, pdf_array_get(ctx, obj, 1), clearme); } else if (pdf_name_eq(ctx, nameobj, PDF_NAME(DeviceN))) { /* If the separation colorants exists for this DeviceN color space * add those prior to our search for DeviceN color */ pdf_obj *cols = pdf_dict_get(ctx, pdf_array_get(ctx, obj, 4), PDF_NAME(Colorants)); n = pdf_dict_len(ctx, cols); for (i = 0; i < n; i++) find_seps(ctx, seps, pdf_dict_get_val(ctx, cols, i), clearme); } } static void find_devn(fz_context *ctx, fz_separations **seps, pdf_obj *obj, pdf_obj *clearme) { int i, j, n, m; pdf_obj *arr; pdf_obj *nameobj = pdf_array_get(ctx, obj, 0); if (!pdf_name_eq(ctx, nameobj, PDF_NAME(DeviceN))) return; arr = pdf_array_get(ctx, obj, 1); m = pdf_array_len(ctx, arr); for (j = 0; j < m; j++) { fz_colorspace *cs; const char *name = pdf_to_name(ctx, pdf_array_get(ctx, arr, j)); /* Skip 'special' colorants. */ if (!strcmp(name, "Black") || !strcmp(name, "Cyan") || !strcmp(name, "Magenta") || !strcmp(name, "Yellow") || !strcmp(name, "All") || !strcmp(name, "None")) continue; n = fz_count_separations(ctx, *seps); for (i = 0; i < n; i++) { if (!strcmp(name, fz_separation_name(ctx, *seps, i))) break; /* Got that one already */ } if (i == n) { fz_try(ctx) cs = pdf_load_colorspace(ctx, obj); fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); continue; /* ignore broken colorspace */ } fz_try(ctx) { if (!*seps) *seps = fz_new_separations(ctx, 0); fz_add_separation(ctx, *seps, name, cs, j); } fz_always(ctx) fz_drop_colorspace(ctx, cs); fz_catch(ctx) fz_rethrow(ctx); } } } typedef void (res_finder_fn)(fz_context *ctx, fz_separations **seps, pdf_obj *obj, pdf_obj *clearme); static void scan_page_seps(fz_context *ctx, pdf_obj *res, fz_separations **seps, res_finder_fn *fn, pdf_obj *clearme) { pdf_obj *dict; pdf_obj *obj; int i, n; if (pdf_mark_obj(ctx, res)) return; /* already been here */ /* remember to clear this resource dictionary at the end */ pdf_array_push(ctx, clearme, res); dict = pdf_dict_get(ctx, res, PDF_NAME(ColorSpace)); n = pdf_dict_len(ctx, dict); for (i = 0; i < n; i++) { obj = pdf_dict_get_val(ctx, dict, i); fn(ctx, seps, obj, clearme); } dict = pdf_dict_get(ctx, res, PDF_NAME(Shading)); n = pdf_dict_len(ctx, dict); for (i = 0; i < n; i++) { obj = pdf_dict_get_val(ctx, dict, i); fn(ctx, seps, pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace)), clearme); } dict = pdf_dict_get(ctx, res, PDF_NAME(XObject)); n = pdf_dict_len(ctx, dict); for (i = 0; i < n; i++) { obj = pdf_dict_get_val(ctx, dict, i); fn(ctx, seps, pdf_dict_get(ctx, obj, PDF_NAME(ColorSpace)), clearme); /* Recurse on XObject forms. */ scan_page_seps(ctx, pdf_dict_get(ctx, obj, PDF_NAME(Resources)), seps, fn, clearme); } } /* Get the separation details for a page. */ fz_separations * pdf_page_separations(fz_context *ctx, pdf_page *page) { pdf_obj *res = pdf_page_resources(ctx, page); pdf_obj *clearme = NULL; fz_separations *seps = NULL; clearme = pdf_new_array(ctx, page->doc, 100); fz_try(ctx) { /* Run through and look for separations first. This is * because separations are simplest to deal with, and * because DeviceN may be implemented on top of separations. */ scan_page_seps(ctx, res, &seps, find_seps, clearme); } fz_always(ctx) { int i, n = pdf_array_len(ctx, clearme); for (i = 0; i < n; ++i) pdf_unmark_obj(ctx, pdf_array_get(ctx, clearme, i)); pdf_drop_obj(ctx, clearme); } fz_catch(ctx) { fz_drop_separations(ctx, seps); fz_rethrow(ctx); } clearme = pdf_new_array(ctx, page->doc, 100); fz_try(ctx) { /* Now run through again, and look for DeviceNs. These may * have spot colors in that aren't defined in terms of * separations. */ scan_page_seps(ctx, res, &seps, find_devn, clearme); } fz_always(ctx) { int i, n = pdf_array_len(ctx, clearme); for (i = 0; i < n; ++i) pdf_unmark_obj(ctx, pdf_array_get(ctx, clearme, i)); pdf_drop_obj(ctx, clearme); } fz_catch(ctx) { fz_drop_separations(ctx, seps); fz_rethrow(ctx); } return seps; } int pdf_page_uses_overprint(fz_context *ctx, pdf_page *page) { return page ? page->overprint : 0; } static void pdf_drop_page_imp(fz_context *ctx, pdf_page *page) { fz_drop_link(ctx, page->links); pdf_drop_annots(ctx, page->annots); pdf_drop_widgets(ctx, page->widgets); pdf_drop_obj(ctx, page->obj); fz_drop_document(ctx, &page->doc->super); } static pdf_page * pdf_new_page(fz_context *ctx, pdf_document *doc) { pdf_page *page = fz_new_derived_page(ctx, pdf_page); page->doc = (pdf_document*) fz_keep_document(ctx, &doc->super); page->super.drop_page = (fz_page_drop_page_fn*)pdf_drop_page_imp; page->super.load_links = (fz_page_load_links_fn*)pdf_load_links; page->super.bound_page = (fz_page_bound_page_fn*)pdf_bound_page; page->super.run_page_contents = (fz_page_run_page_fn*)pdf_run_page_contents; page->super.run_page_annots = (fz_page_run_page_fn*)pdf_run_page_annots; page->super.run_page_widgets = (fz_page_run_page_fn*)pdf_run_page_widgets; page->super.page_presentation = (fz_page_page_presentation_fn*)pdf_page_presentation; page->super.separations = (fz_page_separations_fn *)pdf_page_separations; page->super.overprint = (fz_page_uses_overprint_fn *)pdf_page_uses_overprint; page->obj = NULL; page->transparency = 0; page->links = NULL; page->annots = NULL; page->annot_tailp = &page->annots; page->widgets = NULL; page->widget_tailp = &page->widgets; return page; } static void pdf_load_default_colorspaces_imp(fz_context *ctx, fz_default_colorspaces *default_cs, pdf_obj *obj) { pdf_obj *cs_obj; /* The spec says to ignore any colors we can't understand */ cs_obj = pdf_dict_get(ctx, obj, PDF_NAME(DefaultGray)); if (cs_obj) { fz_try(ctx) { fz_colorspace *cs = pdf_load_colorspace(ctx, cs_obj); fz_set_default_gray(ctx, default_cs, cs); fz_drop_colorspace(ctx, cs); } fz_catch(ctx) fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); } cs_obj = pdf_dict_get(ctx, obj, PDF_NAME(DefaultRGB)); if (cs_obj) { fz_try(ctx) { fz_colorspace *cs = pdf_load_colorspace(ctx, cs_obj); fz_set_default_rgb(ctx, default_cs, cs); fz_drop_colorspace(ctx, cs); } fz_catch(ctx) fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); } cs_obj = pdf_dict_get(ctx, obj, PDF_NAME(DefaultCMYK)); if (cs_obj) { fz_try(ctx) { fz_colorspace *cs = pdf_load_colorspace(ctx, cs_obj); fz_set_default_cmyk(ctx, default_cs, cs); fz_drop_colorspace(ctx, cs); } fz_catch(ctx) fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); } } fz_default_colorspaces * pdf_load_default_colorspaces(fz_context *ctx, pdf_document *doc, pdf_page *page) { pdf_obj *res; pdf_obj *obj; fz_default_colorspaces *default_cs; fz_colorspace *oi; default_cs = fz_new_default_colorspaces(ctx); fz_try(ctx) { res = pdf_page_resources(ctx, page); obj = pdf_dict_get(ctx, res, PDF_NAME(ColorSpace)); if (obj) pdf_load_default_colorspaces_imp(ctx, default_cs, obj); oi = pdf_document_output_intent(ctx, doc); if (oi) fz_set_default_output_intent(ctx, default_cs, oi); } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) { fz_drop_default_colorspaces(ctx, default_cs); fz_rethrow(ctx); } page->super.incomplete = 1; } return default_cs; } /* Update default colorspaces for an xobject. */ fz_default_colorspaces * pdf_update_default_colorspaces(fz_context *ctx, fz_default_colorspaces *old_cs, pdf_obj *res) { pdf_obj *obj; fz_default_colorspaces *new_cs; obj = pdf_dict_get(ctx, res, PDF_NAME(ColorSpace)); if (!obj) return fz_keep_default_colorspaces(ctx, old_cs); new_cs = fz_clone_default_colorspaces(ctx, old_cs); fz_try(ctx) pdf_load_default_colorspaces_imp(ctx, new_cs, obj); fz_catch(ctx) { fz_drop_default_colorspaces(ctx, new_cs); fz_rethrow(ctx); } return new_cs; } /* Load a page and its resources. Locates the page in the PDF document and loads the page and its resources. After pdf_load_page is it possible to retrieve the size of the page using pdf_bound_page, or to render the page using pdf_run_page_*. number: page number, where 0 is the first page of the document. */ pdf_page * pdf_load_page(fz_context *ctx, pdf_document *doc, int number) { pdf_page *page; pdf_annot *annot; pdf_obj *pageobj, *obj; if (doc->file_reading_linearly) { pageobj = pdf_progressive_advance(ctx, doc, number); if (pageobj == NULL) fz_throw(ctx, FZ_ERROR_TRYLATER, "page %d not available yet", number); } else pageobj = pdf_lookup_page_obj(ctx, doc, number); page = pdf_new_page(ctx, doc); page->obj = pdf_keep_obj(ctx, pageobj); /* Pre-load annotations and links */ fz_try(ctx) { obj = pdf_dict_get(ctx, pageobj, PDF_NAME(Annots)); if (obj) { fz_rect page_mediabox; fz_matrix page_ctm; pdf_page_transform(ctx, page, &page_mediabox, &page_ctm); page->links = pdf_load_link_annots(ctx, doc, obj, number, page_ctm); pdf_load_annots(ctx, page, obj); } } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) { fz_drop_page(ctx, &page->super); fz_rethrow(ctx); } page->super.incomplete = 1; fz_drop_link(ctx, page->links); page->links = NULL; } /* Scan for transparency and overprint */ fz_try(ctx) { pdf_obj *resources = pdf_page_resources(ctx, page); if (pdf_name_eq(ctx, pdf_dict_getp(ctx, pageobj, "Group/S"), PDF_NAME(Transparency))) page->transparency = 1; else if (pdf_resources_use_blending(ctx, resources)) page->transparency = 1; for (annot = page->annots; annot && !page->transparency; annot = annot->next) if (annot->ap && pdf_resources_use_blending(ctx, pdf_xobject_resources(ctx, annot->ap))) page->transparency = 1; if (pdf_resources_use_overprint(ctx, resources)) page->overprint = 1; for (annot = page->annots; annot && !page->overprint; annot = annot->next) if (annot->ap && pdf_resources_use_overprint(ctx, pdf_xobject_resources(ctx, annot->ap))) page->overprint = 1; } fz_catch(ctx) { if (fz_caught(ctx) != FZ_ERROR_TRYLATER) { fz_drop_page(ctx, &page->super); fz_rethrow(ctx); } page->super.incomplete = 1; } return page; } fz_page *pdf_load_page_imp(fz_context *ctx, fz_document *doc, int chapter, int number) { return (fz_page*)pdf_load_page(ctx, (pdf_document*)doc, number); } /* Delete a page from the page tree of a document. This does not remove the page contents or resources from the file. doc: The document to operate on. number: The page to remove (numbered from 0) */ void pdf_delete_page(fz_context *ctx, pdf_document *doc, int at) { pdf_obj *parent, *kids; int i; pdf_lookup_page_loc(ctx, doc, at, &parent, &i); kids = pdf_dict_get(ctx, parent, PDF_NAME(Kids)); pdf_array_delete(ctx, kids, i); while (parent) { int count = pdf_dict_get_int(ctx, parent, PDF_NAME(Count)); pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count - 1); parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent)); } } /* Delete a range of pages from the page tree of a document. This does not remove the page contents or resources from the file. doc: The document to operate on. start, end: The range of pages (numbered from 0) (inclusive, exclusive) to remove. If end is negative or greater than the number of pages in the document, it will be taken to be the end of the document. */ void pdf_delete_page_range(fz_context *ctx, pdf_document *doc, int start, int end) { int count = pdf_count_pages(ctx, doc); if (end < 0 || end > count) end = count+1; if (start < 0) start = 0; while (start < end) { pdf_delete_page(ctx, doc, start); end--; } } /* Create a pdf_obj within a document that represents a page, from a previously created resources dictionary and page content stream. This should then be inserted into the document using pdf_insert_page. After this call the page exists within the document structure, but is not actually ever displayed as it is not linked into the PDF page tree. doc: The document to which to add the page. mediabox: The mediabox for the page (should be identical to that used when creating the resources/contents). rotate: 0, 90, 180 or 270. The rotation to use for the page. resources: The resources dictionary for the new page (typically created by pdf_page_write). contents: The page contents for the new page (typically create by pdf_page_write). */ pdf_obj * pdf_add_page(fz_context *ctx, pdf_document *doc, fz_rect mediabox, int rotate, pdf_obj *resources, fz_buffer *contents) { pdf_obj *page_obj = pdf_new_dict(ctx, doc, 5); fz_try(ctx) { pdf_dict_put(ctx, page_obj, PDF_NAME(Type), PDF_NAME(Page)); pdf_dict_put_rect(ctx, page_obj, PDF_NAME(MediaBox), mediabox); pdf_dict_put_int(ctx, page_obj, PDF_NAME(Rotate), rotate); if (pdf_is_indirect(ctx, resources)) pdf_dict_put(ctx, page_obj, PDF_NAME(Resources), resources); else if (pdf_is_dict(ctx, resources)) pdf_dict_put_drop(ctx, page_obj, PDF_NAME(Resources), pdf_add_object(ctx, doc, resources)); else pdf_dict_put_dict(ctx, page_obj, PDF_NAME(Resources), 1); if (contents) pdf_dict_put_drop(ctx, page_obj, PDF_NAME(Contents), pdf_add_stream(ctx, doc, contents, NULL, 0)); } fz_catch(ctx) { pdf_drop_obj(ctx, page_obj); fz_rethrow(ctx); } return pdf_add_object_drop(ctx, doc, page_obj); } /* Insert a page previously created by pdf_add_page into the pages tree of the document. doc: The document to insert into. at: The page number to insert at. 0 inserts at the start. negative numbers, or INT_MAX insert at the end. Otherwise n inserts after page n. page: The page to insert. */ void pdf_insert_page(fz_context *ctx, pdf_document *doc, int at, pdf_obj *page_ref) { int count = pdf_count_pages(ctx, doc); pdf_obj *parent, *kids; int i; if (at < 0) at = count; if (at == INT_MAX) at = count; if (at > count) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot insert page beyond end of page tree"); if (count == 0) { pdf_obj *root = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root)); parent = pdf_dict_get(ctx, root, PDF_NAME(Pages)); if (!parent) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot find page tree"); kids = pdf_dict_get(ctx, parent, PDF_NAME(Kids)); if (!kids) fz_throw(ctx, FZ_ERROR_GENERIC, "malformed page tree"); pdf_array_insert(ctx, kids, page_ref, 0); } else if (at == count) { /* append after last page */ pdf_lookup_page_loc(ctx, doc, count - 1, &parent, &i); kids = pdf_dict_get(ctx, parent, PDF_NAME(Kids)); pdf_array_insert(ctx, kids, page_ref, i + 1); } else { /* insert before found page */ pdf_lookup_page_loc(ctx, doc, at, &parent, &i); kids = pdf_dict_get(ctx, parent, PDF_NAME(Kids)); pdf_array_insert(ctx, kids, page_ref, i); } pdf_dict_put(ctx, page_ref, PDF_NAME(Parent), parent); /* Adjust page counts */ while (parent) { count = pdf_dict_get_int(ctx, parent, PDF_NAME(Count)); pdf_dict_put_int(ctx, parent, PDF_NAME(Count), count + 1); parent = pdf_dict_get(ctx, parent, PDF_NAME(Parent)); } }