eBookReaderSwitch/mupdf/source/fitz/stext-search.c

502 lines
11 KiB
C

#include "mupdf/fitz.h"
#include <string.h>
#include <limits.h>
#include <assert.h>
/* Enumerate marked selection */
static float dist2(float a, float b)
{
return a * a + b * b;
}
static float hdist(fz_point *dir, fz_point *a, fz_point *b)
{
float dx = b->x - a->x;
float dy = b->y - a->y;
return fz_abs(dx * dir->x + dy * dir->y);
}
static float vdist(fz_point *dir, fz_point *a, fz_point *b)
{
float dx = b->x - a->x;
float dy = b->y - a->y;
return fz_abs(dx * dir->y + dy * dir->x);
}
static int line_length(fz_stext_line *line)
{
fz_stext_char *ch;
int n = 0;
for (ch = line->first_char; ch; ch = ch->next)
++n;
return n;
}
static int find_closest_in_line(fz_stext_line *line, int idx, fz_point p)
{
fz_stext_char *ch;
float closest_dist = 1e30f;
int closest_idx = idx;
if (line->dir.x > line->dir.y)
{
if (p.y < line->bbox.y0)
return idx;
if (p.y > line->bbox.y1)
return idx + line_length(line);
}
else
{
if (p.x < line->bbox.x0)
return idx + line_length(line);
if (p.x > line->bbox.x1)
return idx;
}
for (ch = line->first_char; ch; ch = ch->next)
{
float mid_x = (ch->quad.ul.x + ch->quad.ur.x + ch->quad.ll.x + ch->quad.lr.x) / 4;
float mid_y = (ch->quad.ul.y + ch->quad.ur.y + ch->quad.ll.y + ch->quad.lr.y) / 4;
float this_dist = dist2(p.x - mid_x, p.y - mid_y);
if (this_dist < closest_dist)
{
closest_dist = this_dist;
if (line->dir.x > line->dir.y)
closest_idx = (p.x < mid_x) ? idx : idx+1;
else
closest_idx = (p.y < mid_y) ? idx : idx+1;
}
++idx;
}
return closest_idx;
}
static int find_closest_in_page(fz_stext_page *page, fz_point p)
{
fz_stext_block *block;
fz_stext_line *line;
fz_stext_line *closest_line = NULL;
int closest_idx = 0;
float closest_dist = 1e30f;
float this_dist;
int idx = 0;
for (block = page->first_block; block; block = block->next)
{
if (block->type != FZ_STEXT_BLOCK_TEXT)
continue;
for (line = block->u.t.first_line; line; line = line->next)
{
fz_rect box = line->bbox;
if (p.x >= box.x0 && p.x <= box.x1)
{
if (p.y < box.y0)
this_dist = dist2(box.y0 - p.y, 0);
else if (p.y > box.y1)
this_dist = dist2(p.y - box.y1, 0);
else
this_dist = 0;
}
else if (p.y >= box.y0 && p.y <= box.y1)
{
if (p.x < box.x0)
this_dist = dist2(box.x0 - p.x, 0);
else if (p.x > box.x1)
this_dist = dist2(p.x - box.x1, 0);
else
this_dist = 0;
}
else
{
float dul = dist2(p.x - box.x0, p.y - box.y0);
float dur = dist2(p.x - box.x1, p.y - box.y0);
float dll = dist2(p.x - box.x0, p.y - box.y1);
float dlr = dist2(p.x - box.x1, p.y - box.y1);
this_dist = fz_min(fz_min(dul, dur), fz_min(dll, dlr));
}
if (this_dist < closest_dist)
{
closest_dist = this_dist;
closest_line = line;
closest_idx = idx;
}
idx += line_length(line);
}
}
if (closest_line)
return find_closest_in_line(closest_line, closest_idx, p);
return 0;
}
struct callbacks
{
void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch);
void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln);
void *arg;
};
static void
fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb)
{
fz_stext_block *block;
fz_stext_line *line;
fz_stext_char *ch;
int idx, start, end;
int inside;
start = find_closest_in_page(page, a);
end = find_closest_in_page(page, b);
if (start > end)
idx = start, start = end, end = idx;
if (start == end)
return;
inside = 0;
idx = 0;
for (block = page->first_block; block; block = block->next)
{
if (block->type != FZ_STEXT_BLOCK_TEXT)
continue;
for (line = block->u.t.first_line; line; line = line->next)
{
for (ch = line->first_char; ch; ch = ch->next)
{
if (!inside)
if (idx == start)
inside = 1;
if (inside)
cb->on_char(ctx, cb->arg, line, ch);
if (++idx == end)
return;
}
if (inside)
cb->on_line(ctx, cb->arg, line);
}
}
}
fz_quad
fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode)
{
fz_stext_block *block;
fz_stext_line *line;
fz_stext_char *ch;
fz_quad handles;
int idx, start, end;
int pc;
start = find_closest_in_page(page, *a);
end = find_closest_in_page(page, *b);
if (start > end)
idx = start, start = end, end = idx;
handles.ll = handles.ul = *a;
handles.lr = handles.ur = *b;
idx = 0;
for (block = page->first_block; block; block = block->next)
{
if (block->type != FZ_STEXT_BLOCK_TEXT)
continue;
for (line = block->u.t.first_line; line; line = line->next)
{
pc = '\n';
for (ch = line->first_char; ch; ch = ch->next)
{
if (idx <= start)
{
if (mode == FZ_SELECT_CHARS
|| (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n'))
|| (mode == FZ_SELECT_LINES && (pc == '\n')))
{
handles.ll = ch->quad.ll;
handles.ul = ch->quad.ul;
*a = ch->origin;
}
}
if (idx >= end)
{
if (mode == FZ_SELECT_CHARS
|| (mode == FZ_SELECT_WORDS && (ch->c == ' ')))
{
handles.lr = ch->quad.ll;
handles.ur = ch->quad.ul;
*b = ch->origin;
return handles;
}
if (!ch->next)
{
handles.lr = ch->quad.lr;
handles.ur = ch->quad.ur;
*b = ch->quad.lr;
return handles;
}
}
pc = ch->c;
++idx;
}
}
}
return handles;
}
/* Highlight selection */
struct highlight
{
int len, cap;
fz_quad *box;
float hfuzz, vfuzz;
};
static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
{
struct highlight *hits = arg;
float vfuzz = ch->size * hits->vfuzz;
float hfuzz = ch->size * hits->hfuzz;
if (hits->len > 0)
{
fz_quad *end = &hits->box[hits->len-1];
if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz
&& vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz
&& hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz
&& vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz)
{
end->ur = ch->quad.ur;
end->lr = ch->quad.lr;
return;
}
}
if (hits->len < hits->cap)
hits->box[hits->len++] = ch->quad;
}
static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line)
{
}
/*
Return a list of quads to highlight lines inside the selection points.
*/
int
fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads)
{
struct callbacks cb;
struct highlight hits;
hits.len = 0;
hits.cap = max_quads;
hits.box = quads;
hits.hfuzz = 0.5f; /* merge large gaps */
hits.vfuzz = 0.1f;
cb.on_char = on_highlight_char;
cb.on_line = on_highlight_line;
cb.arg = &hits;
fz_enumerate_selection(ctx, page, a, b, &cb);
return hits.len;
}
/* Copy selection */
static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
{
fz_buffer *buffer = arg;
int c = ch->c;
if (c < 32)
c = FZ_REPLACEMENT_CHARACTER;
fz_append_rune(ctx, buffer, c);
}
static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line)
{
fz_buffer *buffer = arg;
fz_append_byte(ctx, buffer, '\r');
fz_append_byte(ctx, buffer, '\n');
}
static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line)
{
fz_buffer *buffer = arg;
fz_append_byte(ctx, buffer, '\n');
}
/*
Return a newly allocated UTF-8 string with the text for a given selection.
crlf: If true, write "\r\n" style line endings (otherwise "\n" only).
*/
char *
fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf)
{
struct callbacks cb;
fz_buffer *buffer;
unsigned char *s;
buffer = fz_new_buffer(ctx, 1024);
cb.on_char = on_copy_char;
cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf;
cb.arg = buffer;
fz_enumerate_selection(ctx, page, a, b, &cb);
fz_terminate_buffer(ctx, buffer);
fz_buffer_extract(ctx, buffer, &s); /* take over the data */
fz_drop_buffer(ctx, buffer);
return (char*)s;
}
/* String search */
static inline int canon(int c)
{
/* TODO: proper unicode case folding */
/* TODO: character equivalence (a matches ä, etc) */
if (c == 0xA0 || c == 0x2028 || c == 0x2029)
return ' ';
if (c == '\r' || c == '\n' || c == '\t')
return ' ';
if (c >= 'A' && c <= 'Z')
return c - 'A' + 'a';
return c;
}
static inline int chartocanon(int *c, const char *s)
{
int n = fz_chartorune(c, s);
*c = canon(*c);
return n;
}
static const char *match_string(const char *h, const char *n)
{
int hc, nc;
const char *e = h;
h += chartocanon(&hc, h);
n += chartocanon(&nc, n);
while (hc == nc)
{
e = h;
if (hc == ' ')
do
h += chartocanon(&hc, h);
while (hc == ' ');
else
h += chartocanon(&hc, h);
if (nc == ' ')
do
n += chartocanon(&nc, n);
while (nc == ' ');
else
n += chartocanon(&nc, n);
}
return nc == 0 ? e : NULL;
}
static const char *find_string(const char *s, const char *needle, const char **endp)
{
const char *end;
while (*s)
{
end = match_string(s, needle);
if (end)
return *endp = end, s;
++s;
}
return *endp = NULL, NULL;
}
/*
Search for occurrence of 'needle' in text page.
Return the number of hits and store hit quads in the passed in array.
NOTE: This is an experimental interface and subject to change without notice.
*/
int
fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, fz_quad *quads, int max_quads)
{
struct highlight hits;
fz_stext_block *block;
fz_stext_line *line;
fz_stext_char *ch;
fz_buffer *buffer;
const char *haystack, *begin, *end;
int c, inside;
if (strlen(needle) == 0)
return 0;
hits.len = 0;
hits.cap = max_quads;
hits.box = quads;
hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
hits.vfuzz = 0.1f;
buffer = fz_new_buffer_from_stext_page(ctx, page);
fz_try(ctx)
{
haystack = fz_string_from_buffer(ctx, buffer);
begin = find_string(haystack, needle, &end);
if (!begin)
goto no_more_matches;
inside = 0;
for (block = page->first_block; block; block = block->next)
{
if (block->type != FZ_STEXT_BLOCK_TEXT)
continue;
for (line = block->u.t.first_line; line; line = line->next)
{
for (ch = line->first_char; ch; ch = ch->next)
{
try_new_match:
if (!inside)
{
if (haystack >= begin)
inside = 1;
}
if (inside)
{
if (haystack < end)
on_highlight_char(ctx, &hits, line, ch);
else
{
inside = 0;
begin = find_string(haystack, needle, &end);
if (!begin)
goto no_more_matches;
else
goto try_new_match;
}
}
haystack += fz_chartorune(&c, haystack);
}
assert(*haystack == '\n');
++haystack;
}
assert(*haystack == '\n');
++haystack;
}
no_more_matches:;
}
fz_always(ctx)
fz_drop_buffer(ctx, buffer);
fz_catch(ctx)
fz_rethrow(ctx);
return hits.len;
}