#include "mupdf/fitz.h" #include #include #include /* Enumerate marked selection */ static float dist2(float a, float b) { return a * a + b * b; } static float hdist(fz_point *dir, fz_point *a, fz_point *b) { float dx = b->x - a->x; float dy = b->y - a->y; return fz_abs(dx * dir->x + dy * dir->y); } static float vdist(fz_point *dir, fz_point *a, fz_point *b) { float dx = b->x - a->x; float dy = b->y - a->y; return fz_abs(dx * dir->y + dy * dir->x); } static int line_length(fz_stext_line *line) { fz_stext_char *ch; int n = 0; for (ch = line->first_char; ch; ch = ch->next) ++n; return n; } static int find_closest_in_line(fz_stext_line *line, int idx, fz_point p) { fz_stext_char *ch; float closest_dist = 1e30f; int closest_idx = idx; if (line->dir.x > line->dir.y) { if (p.y < line->bbox.y0) return idx; if (p.y > line->bbox.y1) return idx + line_length(line); } else { if (p.x < line->bbox.x0) return idx + line_length(line); if (p.x > line->bbox.x1) return idx; } for (ch = line->first_char; ch; ch = ch->next) { float mid_x = (ch->quad.ul.x + ch->quad.ur.x + ch->quad.ll.x + ch->quad.lr.x) / 4; float mid_y = (ch->quad.ul.y + ch->quad.ur.y + ch->quad.ll.y + ch->quad.lr.y) / 4; float this_dist = dist2(p.x - mid_x, p.y - mid_y); if (this_dist < closest_dist) { closest_dist = this_dist; if (line->dir.x > line->dir.y) closest_idx = (p.x < mid_x) ? idx : idx+1; else closest_idx = (p.y < mid_y) ? idx : idx+1; } ++idx; } return closest_idx; } static int find_closest_in_page(fz_stext_page *page, fz_point p) { fz_stext_block *block; fz_stext_line *line; fz_stext_line *closest_line = NULL; int closest_idx = 0; float closest_dist = 1e30f; float this_dist; int idx = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { fz_rect box = line->bbox; if (p.x >= box.x0 && p.x <= box.x1) { if (p.y < box.y0) this_dist = dist2(box.y0 - p.y, 0); else if (p.y > box.y1) this_dist = dist2(p.y - box.y1, 0); else this_dist = 0; } else if (p.y >= box.y0 && p.y <= box.y1) { if (p.x < box.x0) this_dist = dist2(box.x0 - p.x, 0); else if (p.x > box.x1) this_dist = dist2(p.x - box.x1, 0); else this_dist = 0; } else { float dul = dist2(p.x - box.x0, p.y - box.y0); float dur = dist2(p.x - box.x1, p.y - box.y0); float dll = dist2(p.x - box.x0, p.y - box.y1); float dlr = dist2(p.x - box.x1, p.y - box.y1); this_dist = fz_min(fz_min(dul, dur), fz_min(dll, dlr)); } if (this_dist < closest_dist) { closest_dist = this_dist; closest_line = line; closest_idx = idx; } idx += line_length(line); } } if (closest_line) return find_closest_in_line(closest_line, closest_idx, p); return 0; } struct callbacks { void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch); void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln); void *arg; }; static void fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; int idx, start, end; int inside; start = find_closest_in_page(page, a); end = find_closest_in_page(page, b); if (start > end) idx = start, start = end, end = idx; if (start == end) return; inside = 0; idx = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { for (ch = line->first_char; ch; ch = ch->next) { if (!inside) if (idx == start) inside = 1; if (inside) cb->on_char(ctx, cb->arg, line, ch); if (++idx == end) return; } if (inside) cb->on_line(ctx, cb->arg, line); } } } fz_quad fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_quad handles; int idx, start, end; int pc; start = find_closest_in_page(page, *a); end = find_closest_in_page(page, *b); if (start > end) idx = start, start = end, end = idx; handles.ll = handles.ul = *a; handles.lr = handles.ur = *b; idx = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { pc = '\n'; for (ch = line->first_char; ch; ch = ch->next) { if (idx <= start) { if (mode == FZ_SELECT_CHARS || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n')) || (mode == FZ_SELECT_LINES && (pc == '\n'))) { handles.ll = ch->quad.ll; handles.ul = ch->quad.ul; *a = ch->origin; } } if (idx >= end) { if (mode == FZ_SELECT_CHARS || (mode == FZ_SELECT_WORDS && (ch->c == ' '))) { handles.lr = ch->quad.ll; handles.ur = ch->quad.ul; *b = ch->origin; return handles; } if (!ch->next) { handles.lr = ch->quad.lr; handles.ur = ch->quad.ur; *b = ch->quad.lr; return handles; } } pc = ch->c; ++idx; } } } return handles; } /* Highlight selection */ struct highlight { int len, cap; fz_quad *box; float hfuzz, vfuzz; }; static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) { struct highlight *hits = arg; float vfuzz = ch->size * hits->vfuzz; float hfuzz = ch->size * hits->hfuzz; if (hits->len > 0) { fz_quad *end = &hits->box[hits->len-1]; if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz) { end->ur = ch->quad.ur; end->lr = ch->quad.lr; return; } } if (hits->len < hits->cap) hits->box[hits->len++] = ch->quad; } static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line) { } /* Return a list of quads to highlight lines inside the selection points. */ int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads) { struct callbacks cb; struct highlight hits; hits.len = 0; hits.cap = max_quads; hits.box = quads; hits.hfuzz = 0.5f; /* merge large gaps */ hits.vfuzz = 0.1f; cb.on_char = on_highlight_char; cb.on_line = on_highlight_line; cb.arg = &hits; fz_enumerate_selection(ctx, page, a, b, &cb); return hits.len; } /* Copy selection */ static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch) { fz_buffer *buffer = arg; int c = ch->c; if (c < 32) c = FZ_REPLACEMENT_CHARACTER; fz_append_rune(ctx, buffer, c); } static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line) { fz_buffer *buffer = arg; fz_append_byte(ctx, buffer, '\r'); fz_append_byte(ctx, buffer, '\n'); } static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line) { fz_buffer *buffer = arg; fz_append_byte(ctx, buffer, '\n'); } /* Return a newly allocated UTF-8 string with the text for a given selection. crlf: If true, write "\r\n" style line endings (otherwise "\n" only). */ char * fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf) { struct callbacks cb; fz_buffer *buffer; unsigned char *s; buffer = fz_new_buffer(ctx, 1024); cb.on_char = on_copy_char; cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf; cb.arg = buffer; fz_enumerate_selection(ctx, page, a, b, &cb); fz_terminate_buffer(ctx, buffer); fz_buffer_extract(ctx, buffer, &s); /* take over the data */ fz_drop_buffer(ctx, buffer); return (char*)s; } /* String search */ static inline int canon(int c) { /* TODO: proper unicode case folding */ /* TODO: character equivalence (a matches รค, etc) */ if (c == 0xA0 || c == 0x2028 || c == 0x2029) return ' '; if (c == '\r' || c == '\n' || c == '\t') return ' '; if (c >= 'A' && c <= 'Z') return c - 'A' + 'a'; return c; } static inline int chartocanon(int *c, const char *s) { int n = fz_chartorune(c, s); *c = canon(*c); return n; } static const char *match_string(const char *h, const char *n) { int hc, nc; const char *e = h; h += chartocanon(&hc, h); n += chartocanon(&nc, n); while (hc == nc) { e = h; if (hc == ' ') do h += chartocanon(&hc, h); while (hc == ' '); else h += chartocanon(&hc, h); if (nc == ' ') do n += chartocanon(&nc, n); while (nc == ' '); else n += chartocanon(&nc, n); } return nc == 0 ? e : NULL; } static const char *find_string(const char *s, const char *needle, const char **endp) { const char *end; while (*s) { end = match_string(s, needle); if (end) return *endp = end, s; ++s; } return *endp = NULL, NULL; } /* Search for occurrence of 'needle' in text page. Return the number of hits and store hit quads in the passed in array. NOTE: This is an experimental interface and subject to change without notice. */ int fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, fz_quad *quads, int max_quads) { struct highlight hits; fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_buffer *buffer; const char *haystack, *begin, *end; int c, inside; if (strlen(needle) == 0) return 0; hits.len = 0; hits.cap = max_quads; hits.box = quads; hits.hfuzz = 0.2f; /* merge kerns but not large gaps */ hits.vfuzz = 0.1f; buffer = fz_new_buffer_from_stext_page(ctx, page); fz_try(ctx) { haystack = fz_string_from_buffer(ctx, buffer); begin = find_string(haystack, needle, &end); if (!begin) goto no_more_matches; inside = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { for (ch = line->first_char; ch; ch = ch->next) { try_new_match: if (!inside) { if (haystack >= begin) inside = 1; } if (inside) { if (haystack < end) on_highlight_char(ctx, &hits, line, ch); else { inside = 0; begin = find_string(haystack, needle, &end); if (!begin) goto no_more_matches; else goto try_new_match; } } haystack += fz_chartorune(&c, haystack); } assert(*haystack == '\n'); ++haystack; } assert(*haystack == '\n'); ++haystack; } no_more_matches:; } fz_always(ctx) fz_drop_buffer(ctx, buffer); fz_catch(ctx) fz_rethrow(ctx); return hits.len; }