651 lines
17 KiB
C
651 lines
17 KiB
C
|
#include "fitz-imp.h"
|
||
|
|
||
|
#define SUBSCRIPT_OFFSET 0.2f
|
||
|
#define SUPERSCRIPT_OFFSET -0.2f
|
||
|
|
||
|
#include <ft2build.h>
|
||
|
#include FT_FREETYPE_H
|
||
|
|
||
|
/* HTML output (visual formatting with preserved layout) */
|
||
|
|
||
|
static int
|
||
|
detect_super_script(fz_stext_line *line, fz_stext_char *ch)
|
||
|
{
|
||
|
if (line->wmode == 0 && line->dir.x == 1 && line->dir.y == 0)
|
||
|
return ch->origin.y < line->first_char->origin.y - ch->size * 0.1f;
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static const char *
|
||
|
font_full_name(fz_context *ctx, fz_font *font)
|
||
|
{
|
||
|
const char *name = fz_font_name(ctx, font);
|
||
|
const char *s = strchr(name, '+');
|
||
|
return s ? s + 1 : name;
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
font_family_name(fz_context *ctx, fz_font *font, char *buf, int size, int is_mono, int is_serif)
|
||
|
{
|
||
|
const char *name = font_full_name(ctx, font);
|
||
|
char *s;
|
||
|
fz_strlcpy(buf, name, size);
|
||
|
s = strrchr(buf, '-');
|
||
|
if (s)
|
||
|
*s = 0;
|
||
|
if (is_mono)
|
||
|
fz_strlcat(buf, ",monospace", size);
|
||
|
else
|
||
|
fz_strlcat(buf, is_serif ? ",serif" : ",sans-serif", size);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fz_print_style_begin_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup, int color)
|
||
|
{
|
||
|
char family[80];
|
||
|
|
||
|
int is_bold = fz_font_is_bold(ctx, font);
|
||
|
int is_italic = fz_font_is_italic(ctx, font);
|
||
|
int is_serif = fz_font_is_serif(ctx, font);
|
||
|
int is_mono = fz_font_is_monospaced(ctx, font);
|
||
|
|
||
|
font_family_name(ctx, font, family, sizeof family, is_mono, is_serif);
|
||
|
|
||
|
if (sup) fz_write_string(ctx, out, "<sup>");
|
||
|
if (is_mono) fz_write_string(ctx, out, "<tt>");
|
||
|
if (is_bold) fz_write_string(ctx, out, "<b>");
|
||
|
if (is_italic) fz_write_string(ctx, out, "<i>");
|
||
|
fz_write_printf(ctx, out, "<span style=\"font-family:%s;font-size:%gpt", family, size);
|
||
|
if (color != 0)
|
||
|
fz_write_printf(ctx, out, ";color:#%06x", color);
|
||
|
fz_write_printf(ctx, out, "\">");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fz_print_style_end_html(fz_context *ctx, fz_output *out, fz_font *font, float size, int sup)
|
||
|
{
|
||
|
int is_mono = fz_font_is_monospaced(ctx, font);
|
||
|
int is_bold = fz_font_is_bold(ctx,font);
|
||
|
int is_italic = fz_font_is_italic(ctx, font);
|
||
|
|
||
|
fz_write_string(ctx, out, "</span>");
|
||
|
if (is_italic) fz_write_string(ctx, out, "</i>");
|
||
|
if (is_bold) fz_write_string(ctx, out, "</b>");
|
||
|
if (is_mono) fz_write_string(ctx, out, "</tt>");
|
||
|
if (sup) fz_write_string(ctx, out, "</sup>");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fz_print_stext_image_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
|
||
|
{
|
||
|
int x = block->bbox.x0;
|
||
|
int y = block->bbox.y0;
|
||
|
int w = block->bbox.x1 - block->bbox.x0;
|
||
|
int h = block->bbox.y1 - block->bbox.y0;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<img style=\"position:absolute;top:%dpt;left:%dpt;width:%dpt;height:%dpt\" src=\"", y, x, w, h);
|
||
|
fz_write_image_as_data_uri(ctx, out, block->u.i.image);
|
||
|
fz_write_string(ctx, out, "\">\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fz_print_stext_block_as_html(fz_context *ctx, fz_output *out, fz_stext_block *block)
|
||
|
{
|
||
|
fz_stext_line *line;
|
||
|
fz_stext_char *ch;
|
||
|
int x, y;
|
||
|
|
||
|
fz_font *font = NULL;
|
||
|
float size = 0;
|
||
|
int sup = 0;
|
||
|
int color = 0;
|
||
|
|
||
|
for (line = block->u.t.first_line; line; line = line->next)
|
||
|
{
|
||
|
x = line->bbox.x0;
|
||
|
y = line->bbox.y0;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<p style=\"position:absolute;white-space:pre;margin:0;padding:0;top:%dpt;left:%dpt\">", y, x);
|
||
|
font = NULL;
|
||
|
|
||
|
for (ch = line->first_char; ch; ch = ch->next)
|
||
|
{
|
||
|
int ch_sup = detect_super_script(line, ch);
|
||
|
if (ch->font != font || ch->size != size || ch_sup != sup || ch->color != color)
|
||
|
{
|
||
|
if (font)
|
||
|
fz_print_style_end_html(ctx, out, font, size, sup);
|
||
|
font = ch->font;
|
||
|
size = ch->size;
|
||
|
color = ch->color;
|
||
|
sup = ch_sup;
|
||
|
fz_print_style_begin_html(ctx, out, font, size, sup, color);
|
||
|
}
|
||
|
|
||
|
switch (ch->c)
|
||
|
{
|
||
|
default:
|
||
|
if (ch->c >= 32 && ch->c <= 127)
|
||
|
fz_write_byte(ctx, out, ch->c);
|
||
|
else
|
||
|
fz_write_printf(ctx, out, "&#x%x;", ch->c);
|
||
|
break;
|
||
|
case '<': fz_write_string(ctx, out, "<"); break;
|
||
|
case '>': fz_write_string(ctx, out, ">"); break;
|
||
|
case '&': fz_write_string(ctx, out, "&"); break;
|
||
|
case '"': fz_write_string(ctx, out, """); break;
|
||
|
case '\'': fz_write_string(ctx, out, "'"); break;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (font)
|
||
|
fz_print_style_end_html(ctx, out, font, size, sup);
|
||
|
|
||
|
fz_write_string(ctx, out, "</p>\n");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Output a page to a file in HTML (visual) format.
|
||
|
*/
|
||
|
void
|
||
|
fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
|
||
|
{
|
||
|
fz_stext_block *block;
|
||
|
|
||
|
int w = page->mediabox.x1 - page->mediabox.x0;
|
||
|
int h = page->mediabox.y1 - page->mediabox.y0;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<div id=\"page%d\" style=\"position:relative;width:%dpt;height:%dpt;background-color:white\">\n", id, w, h);
|
||
|
|
||
|
for (block = page->first_block; block; block = block->next)
|
||
|
{
|
||
|
if (block->type == FZ_STEXT_BLOCK_IMAGE)
|
||
|
fz_print_stext_image_as_html(ctx, out, block);
|
||
|
else if (block->type == FZ_STEXT_BLOCK_TEXT)
|
||
|
fz_print_stext_block_as_html(ctx, out, block);
|
||
|
}
|
||
|
|
||
|
fz_write_string(ctx, out, "</div>\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fz_print_stext_header_as_html(fz_context *ctx, fz_output *out)
|
||
|
{
|
||
|
fz_write_string(ctx, out, "<!DOCTYPE html>\n");
|
||
|
fz_write_string(ctx, out, "<html>\n");
|
||
|
fz_write_string(ctx, out, "<head>\n");
|
||
|
fz_write_string(ctx, out, "<style>\n");
|
||
|
fz_write_string(ctx, out, "body{background-color:gray}\n");
|
||
|
fz_write_string(ctx, out, "div{margin:1em auto}\n");
|
||
|
fz_write_string(ctx, out, "</style>\n");
|
||
|
fz_write_string(ctx, out, "</head>\n");
|
||
|
fz_write_string(ctx, out, "<body>\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fz_print_stext_trailer_as_html(fz_context *ctx, fz_output *out)
|
||
|
{
|
||
|
fz_write_string(ctx, out, "</body>\n");
|
||
|
fz_write_string(ctx, out, "</html>\n");
|
||
|
}
|
||
|
|
||
|
/* XHTML output (semantic, little layout, suitable for reflow) */
|
||
|
|
||
|
static void
|
||
|
fz_print_stext_image_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
|
||
|
{
|
||
|
int w = block->bbox.x1 - block->bbox.x0;
|
||
|
int h = block->bbox.y1 - block->bbox.y0;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<p><img width=\"%d\" height=\"%d\" src=\"", w, h);
|
||
|
fz_write_image_as_data_uri(ctx, out, block->u.i.image);
|
||
|
fz_write_string(ctx, out, "\"/></p>\n");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fz_print_style_begin_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup)
|
||
|
{
|
||
|
int is_mono = fz_font_is_monospaced(ctx, font);
|
||
|
int is_bold = fz_font_is_bold(ctx, font);
|
||
|
int is_italic = fz_font_is_italic(ctx, font);
|
||
|
|
||
|
if (sup)
|
||
|
fz_write_string(ctx, out, "<sup>");
|
||
|
if (is_mono)
|
||
|
fz_write_string(ctx, out, "<tt>");
|
||
|
if (is_bold)
|
||
|
fz_write_string(ctx, out, "<b>");
|
||
|
if (is_italic)
|
||
|
fz_write_string(ctx, out, "<i>");
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
fz_print_style_end_xhtml(fz_context *ctx, fz_output *out, fz_font *font, int sup)
|
||
|
{
|
||
|
int is_mono = fz_font_is_monospaced(ctx, font);
|
||
|
int is_bold = fz_font_is_bold(ctx, font);
|
||
|
int is_italic = fz_font_is_italic(ctx, font);
|
||
|
|
||
|
if (is_italic)
|
||
|
fz_write_string(ctx, out, "</i>");
|
||
|
if (is_bold)
|
||
|
fz_write_string(ctx, out, "</b>");
|
||
|
if (is_mono)
|
||
|
fz_write_string(ctx, out, "</tt>");
|
||
|
if (sup)
|
||
|
fz_write_string(ctx, out, "</sup>");
|
||
|
}
|
||
|
|
||
|
static float avg_font_size_of_line(fz_stext_char *ch)
|
||
|
{
|
||
|
float size = 0;
|
||
|
int n = 0;
|
||
|
if (!ch)
|
||
|
return 0;
|
||
|
while (ch)
|
||
|
{
|
||
|
size += ch->size;
|
||
|
++n;
|
||
|
ch = ch->next;
|
||
|
}
|
||
|
return size / n;
|
||
|
}
|
||
|
|
||
|
static const char *tag_from_font_size(float size)
|
||
|
{
|
||
|
if (size >= 20) return "h1";
|
||
|
if (size >= 15) return "h2";
|
||
|
if (size >= 12) return "h3";
|
||
|
return "p";
|
||
|
}
|
||
|
|
||
|
static void fz_print_stext_block_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_block *block)
|
||
|
{
|
||
|
fz_stext_line *line;
|
||
|
fz_stext_char *ch;
|
||
|
|
||
|
fz_font *font = NULL;
|
||
|
int sup = 0;
|
||
|
int sp = 1;
|
||
|
const char *tag = NULL;
|
||
|
const char *new_tag;
|
||
|
|
||
|
for (line = block->u.t.first_line; line; line = line->next)
|
||
|
{
|
||
|
new_tag = tag_from_font_size(avg_font_size_of_line(line->first_char));
|
||
|
if (tag != new_tag)
|
||
|
{
|
||
|
if (tag)
|
||
|
{
|
||
|
if (font)
|
||
|
fz_print_style_end_xhtml(ctx, out, font, sup);
|
||
|
fz_write_printf(ctx, out, "</%s>", tag);
|
||
|
}
|
||
|
tag = new_tag;
|
||
|
fz_write_printf(ctx, out, "<%s>", tag);
|
||
|
if (font)
|
||
|
fz_print_style_begin_xhtml(ctx, out, font, sup);
|
||
|
}
|
||
|
|
||
|
if (!sp)
|
||
|
fz_write_byte(ctx, out, ' ');
|
||
|
|
||
|
for (ch = line->first_char; ch; ch = ch->next)
|
||
|
{
|
||
|
int ch_sup = detect_super_script(line, ch);
|
||
|
if (ch->font != font || ch_sup != sup)
|
||
|
{
|
||
|
if (font)
|
||
|
fz_print_style_end_xhtml(ctx, out, font, sup);
|
||
|
font = ch->font;
|
||
|
sup = ch_sup;
|
||
|
fz_print_style_begin_xhtml(ctx, out, font, sup);
|
||
|
}
|
||
|
|
||
|
sp = (ch->c == ' ');
|
||
|
switch (ch->c)
|
||
|
{
|
||
|
default:
|
||
|
if (ch->c >= 32 && ch->c <= 127)
|
||
|
fz_write_byte(ctx, out, ch->c);
|
||
|
else
|
||
|
fz_write_printf(ctx, out, "&#x%x;", ch->c);
|
||
|
break;
|
||
|
case '<': fz_write_string(ctx, out, "<"); break;
|
||
|
case '>': fz_write_string(ctx, out, ">"); break;
|
||
|
case '&': fz_write_string(ctx, out, "&"); break;
|
||
|
case '"': fz_write_string(ctx, out, """); break;
|
||
|
case '\'': fz_write_string(ctx, out, "'"); break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (font)
|
||
|
fz_print_style_end_xhtml(ctx, out, font, sup);
|
||
|
fz_write_printf(ctx, out, "</%s>\n", tag);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
Output a page to a file in XHTML (semantic) format.
|
||
|
*/
|
||
|
void
|
||
|
fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
|
||
|
{
|
||
|
fz_stext_block *block;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<div id=\"page%d\">\n", id);
|
||
|
|
||
|
for (block = page->first_block; block; block = block->next)
|
||
|
{
|
||
|
if (block->type == FZ_STEXT_BLOCK_IMAGE)
|
||
|
fz_print_stext_image_as_xhtml(ctx, out, block);
|
||
|
else if (block->type == FZ_STEXT_BLOCK_TEXT)
|
||
|
fz_print_stext_block_as_xhtml(ctx, out, block);
|
||
|
}
|
||
|
|
||
|
fz_write_string(ctx, out, "</div>\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fz_print_stext_header_as_xhtml(fz_context *ctx, fz_output *out)
|
||
|
{
|
||
|
fz_write_string(ctx, out, "<?xml version=\"1.0\"?>\n");
|
||
|
fz_write_string(ctx, out, "<!DOCTYPE html");
|
||
|
fz_write_string(ctx, out, " PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"");
|
||
|
fz_write_string(ctx, out, " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n");
|
||
|
fz_write_string(ctx, out, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n");
|
||
|
fz_write_string(ctx, out, "<head>\n");
|
||
|
fz_write_string(ctx, out, "<style>\n");
|
||
|
fz_write_string(ctx, out, "p{white-space:pre-wrap}\n");
|
||
|
fz_write_string(ctx, out, "</style>\n");
|
||
|
fz_write_string(ctx, out, "</head>\n");
|
||
|
fz_write_string(ctx, out, "<body>\n");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
fz_print_stext_trailer_as_xhtml(fz_context *ctx, fz_output *out)
|
||
|
{
|
||
|
fz_write_string(ctx, out, "</body>\n");
|
||
|
fz_write_string(ctx, out, "</html>\n");
|
||
|
}
|
||
|
|
||
|
/* Detailed XML dump of the entire structured text data */
|
||
|
|
||
|
/*
|
||
|
Output a page to a file in XML format.
|
||
|
*/
|
||
|
void
|
||
|
fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id)
|
||
|
{
|
||
|
fz_stext_block *block;
|
||
|
fz_stext_line *line;
|
||
|
fz_stext_char *ch;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<page id=\"page%d\" width=\"%g\" height=\"%g\">\n", id,
|
||
|
page->mediabox.x1 - page->mediabox.x0,
|
||
|
page->mediabox.y1 - page->mediabox.y0);
|
||
|
|
||
|
for (block = page->first_block; block; block = block->next)
|
||
|
{
|
||
|
switch (block->type)
|
||
|
{
|
||
|
case FZ_STEXT_BLOCK_TEXT:
|
||
|
fz_write_printf(ctx, out, "<block bbox=\"%g %g %g %g\">\n",
|
||
|
block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
|
||
|
for (line = block->u.t.first_line; line; line = line->next)
|
||
|
{
|
||
|
fz_font *font = NULL;
|
||
|
float size = 0;
|
||
|
const char *name = NULL;
|
||
|
|
||
|
fz_write_printf(ctx, out, "<line bbox=\"%g %g %g %g\" wmode=\"%d\" dir=\"%g %g\">\n",
|
||
|
line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1,
|
||
|
line->wmode,
|
||
|
line->dir.x, line->dir.y);
|
||
|
|
||
|
for (ch = line->first_char; ch; ch = ch->next)
|
||
|
{
|
||
|
if (ch->font != font || ch->size != size)
|
||
|
{
|
||
|
if (font)
|
||
|
fz_write_string(ctx, out, "</font>\n");
|
||
|
font = ch->font;
|
||
|
size = ch->size;
|
||
|
name = font_full_name(ctx, font);
|
||
|
fz_write_printf(ctx, out, "<font name=\"%s\" size=\"%g\">\n", name, size);
|
||
|
}
|
||
|
fz_write_printf(ctx, out, "<char quad=\"%g %g %g %g %g %g %g %g\" x=\"%g\" y=\"%g\" color=\"#%06x\" c=\"",
|
||
|
ch->quad.ul.x, ch->quad.ul.y,
|
||
|
ch->quad.ur.x, ch->quad.ur.y,
|
||
|
ch->quad.ll.x, ch->quad.ll.y,
|
||
|
ch->quad.lr.x, ch->quad.lr.y,
|
||
|
ch->origin.x, ch->origin.y,
|
||
|
ch->color);
|
||
|
switch (ch->c)
|
||
|
{
|
||
|
case '<': fz_write_string(ctx, out, "<"); break;
|
||
|
case '>': fz_write_string(ctx, out, ">"); break;
|
||
|
case '&': fz_write_string(ctx, out, "&"); break;
|
||
|
case '"': fz_write_string(ctx, out, """); break;
|
||
|
case '\'': fz_write_string(ctx, out, "'"); break;
|
||
|
default:
|
||
|
if (ch->c >= 32 && ch->c <= 127)
|
||
|
fz_write_printf(ctx, out, "%c", ch->c);
|
||
|
else
|
||
|
fz_write_printf(ctx, out, "&#x%x;", ch->c);
|
||
|
break;
|
||
|
}
|
||
|
fz_write_string(ctx, out, "\"/>\n");
|
||
|
}
|
||
|
|
||
|
if (font)
|
||
|
fz_write_string(ctx, out, "</font>\n");
|
||
|
|
||
|
fz_write_string(ctx, out, "</line>\n");
|
||
|
}
|
||
|
fz_write_string(ctx, out, "</block>\n");
|
||
|
break;
|
||
|
|
||
|
case FZ_STEXT_BLOCK_IMAGE:
|
||
|
fz_write_printf(ctx, out, "<image bbox=\"%g %g %g %g\" />\n",
|
||
|
block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
fz_write_string(ctx, out, "</page>\n");
|
||
|
}
|
||
|
|
||
|
/* Plain text */
|
||
|
|
||
|
/*
|
||
|
Output a page to a file in UTF-8 format.
|
||
|
*/
|
||
|
void
|
||
|
fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page)
|
||
|
{
|
||
|
fz_stext_block *block;
|
||
|
fz_stext_line *line;
|
||
|
fz_stext_char *ch;
|
||
|
char utf[10];
|
||
|
int i, n;
|
||
|
|
||
|
for (block = page->first_block; block; block = block->next)
|
||
|
{
|
||
|
if (block->type == FZ_STEXT_BLOCK_TEXT)
|
||
|
{
|
||
|
for (line = block->u.t.first_line; line; line = line->next)
|
||
|
{
|
||
|
for (ch = line->first_char; ch; ch = ch->next)
|
||
|
{
|
||
|
n = fz_runetochar(utf, ch->c);
|
||
|
for (i = 0; i < n; i++)
|
||
|
fz_write_byte(ctx, out, utf[i]);
|
||
|
}
|
||
|
fz_write_string(ctx, out, "\n");
|
||
|
}
|
||
|
fz_write_string(ctx, out, "\n");
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Text output writer */
|
||
|
|
||
|
enum {
|
||
|
FZ_FORMAT_TEXT,
|
||
|
FZ_FORMAT_HTML,
|
||
|
FZ_FORMAT_XHTML,
|
||
|
FZ_FORMAT_STEXT,
|
||
|
};
|
||
|
|
||
|
typedef struct fz_text_writer_s fz_text_writer;
|
||
|
|
||
|
struct fz_text_writer_s
|
||
|
{
|
||
|
fz_document_writer super;
|
||
|
int format;
|
||
|
int number;
|
||
|
fz_stext_options opts;
|
||
|
fz_stext_page *page;
|
||
|
fz_output *out;
|
||
|
};
|
||
|
|
||
|
static fz_device *
|
||
|
text_begin_page(fz_context *ctx, fz_document_writer *wri_, fz_rect mediabox)
|
||
|
{
|
||
|
fz_text_writer *wri = (fz_text_writer*)wri_;
|
||
|
|
||
|
if (wri->page)
|
||
|
{
|
||
|
fz_drop_stext_page(ctx, wri->page);
|
||
|
wri->page = NULL;
|
||
|
}
|
||
|
|
||
|
wri->number++;
|
||
|
|
||
|
wri->page = fz_new_stext_page(ctx, mediabox);
|
||
|
return fz_new_stext_device(ctx, wri->page, &wri->opts);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
text_end_page(fz_context *ctx, fz_document_writer *wri_, fz_device *dev)
|
||
|
{
|
||
|
fz_text_writer *wri = (fz_text_writer*)wri_;
|
||
|
|
||
|
fz_try(ctx)
|
||
|
{
|
||
|
fz_close_device(ctx, dev);
|
||
|
switch (wri->format)
|
||
|
{
|
||
|
default:
|
||
|
case FZ_FORMAT_TEXT:
|
||
|
fz_print_stext_page_as_text(ctx, wri->out, wri->page);
|
||
|
break;
|
||
|
case FZ_FORMAT_HTML:
|
||
|
fz_print_stext_page_as_html(ctx, wri->out, wri->page, wri->number);
|
||
|
break;
|
||
|
case FZ_FORMAT_XHTML:
|
||
|
fz_print_stext_page_as_xhtml(ctx, wri->out, wri->page, wri->number);
|
||
|
break;
|
||
|
case FZ_FORMAT_STEXT:
|
||
|
fz_print_stext_page_as_xml(ctx, wri->out, wri->page, wri->number);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
fz_always(ctx)
|
||
|
{
|
||
|
fz_drop_device(ctx, dev);
|
||
|
fz_drop_stext_page(ctx, wri->page);
|
||
|
wri->page = NULL;
|
||
|
}
|
||
|
fz_catch(ctx)
|
||
|
fz_rethrow(ctx);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
text_close_writer(fz_context *ctx, fz_document_writer *wri_)
|
||
|
{
|
||
|
fz_text_writer *wri = (fz_text_writer*)wri_;
|
||
|
switch (wri->format)
|
||
|
{
|
||
|
case FZ_FORMAT_HTML:
|
||
|
fz_print_stext_trailer_as_html(ctx, wri->out);
|
||
|
break;
|
||
|
case FZ_FORMAT_XHTML:
|
||
|
fz_print_stext_trailer_as_xhtml(ctx, wri->out);
|
||
|
break;
|
||
|
case FZ_FORMAT_STEXT:
|
||
|
fz_write_string(ctx, wri->out, "</document>\n");
|
||
|
break;
|
||
|
}
|
||
|
fz_close_output(ctx, wri->out);
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
text_drop_writer(fz_context *ctx, fz_document_writer *wri_)
|
||
|
{
|
||
|
fz_text_writer *wri = (fz_text_writer*)wri_;
|
||
|
fz_drop_stext_page(ctx, wri->page);
|
||
|
fz_drop_output(ctx, wri->out);
|
||
|
}
|
||
|
|
||
|
fz_document_writer *
|
||
|
fz_new_text_writer_with_output(fz_context *ctx, const char *format, fz_output *out, const char *options)
|
||
|
{
|
||
|
fz_text_writer *wri;
|
||
|
|
||
|
wri = fz_new_derived_document_writer(ctx, fz_text_writer, text_begin_page, text_end_page, text_close_writer, text_drop_writer);
|
||
|
fz_try(ctx)
|
||
|
{
|
||
|
fz_parse_stext_options(ctx, &wri->opts, options);
|
||
|
|
||
|
wri->format = FZ_FORMAT_TEXT;
|
||
|
if (!strcmp(format, "text"))
|
||
|
wri->format = FZ_FORMAT_TEXT;
|
||
|
else if (!strcmp(format, "html"))
|
||
|
wri->format = FZ_FORMAT_HTML;
|
||
|
else if (!strcmp(format, "xhtml"))
|
||
|
wri->format = FZ_FORMAT_XHTML;
|
||
|
else if (!strcmp(format, "stext"))
|
||
|
wri->format = FZ_FORMAT_STEXT;
|
||
|
|
||
|
wri->out = out;
|
||
|
|
||
|
switch (wri->format)
|
||
|
{
|
||
|
case FZ_FORMAT_HTML:
|
||
|
fz_print_stext_header_as_html(ctx, wri->out);
|
||
|
break;
|
||
|
case FZ_FORMAT_XHTML:
|
||
|
fz_print_stext_header_as_xhtml(ctx, wri->out);
|
||
|
break;
|
||
|
case FZ_FORMAT_STEXT:
|
||
|
fz_write_string(ctx, wri->out, "<?xml version=\"1.0\"?>\n");
|
||
|
fz_write_string(ctx, wri->out, "<document>\n");
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
fz_catch(ctx)
|
||
|
{
|
||
|
fz_free(ctx, wri);
|
||
|
fz_rethrow(ctx);
|
||
|
}
|
||
|
|
||
|
return (fz_document_writer*)wri;
|
||
|
}
|
||
|
|
||
|
fz_document_writer *
|
||
|
fz_new_text_writer(fz_context *ctx, const char *format, const char *path, const char *options)
|
||
|
{
|
||
|
fz_output *out = fz_new_output_with_path(ctx, path ? path : "out.txt", 0);
|
||
|
fz_document_writer *wri = NULL;
|
||
|
fz_try(ctx)
|
||
|
wri = fz_new_text_writer_with_output(ctx, format, out, options);
|
||
|
fz_catch(ctx)
|
||
|
{
|
||
|
fz_drop_output(ctx, out);
|
||
|
fz_rethrow(ctx);
|
||
|
}
|
||
|
return wri;
|
||
|
}
|