pdf_prim.c

Wed, 05 Mar 2003 20:39:50 +0000

author
eric
date
Wed, 05 Mar 2003 20:39:50 +0000
changeset 77
544fff830581
parent 74
12bc5088172e
child 82
abb03c7f4aab
permissions
-rw-r--r--

added count field to struct pdf_bookmark.

     1 /*
     2  * t2p: Create a PDF file from the contents of one or more TIFF
     3  *      bilevel image files.  The images in the resulting PDF file
     4  *      will be compressed using ITU-T T.6 (G4) fax encoding.
     5  *
     6  * PDF routines
     7  * $Id: pdf_prim.c,v 1.6 2003/03/04 17:58:36 eric Exp $
     8  * Copyright 2001, 2002, 2003 Eric Smith <eric@brouhaha.com>
     9  *
    10  * This program is free software; you can redistribute it and/or modify
    11  * it under the terms of the GNU General Public License version 2 as
    12  * published by the Free Software Foundation.  Note that permission is
    13  * not granted to redistribute this program under the terms of any
    14  * other version of the General Public License.
    15  *
    16  * This program is distributed in the hope that it will be useful,
    17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    19  * GNU General Public License for more details.
    20  *
    21  * You should have received a copy of the GNU General Public License
    22  * along with this program; if not, write to the Free Software
    23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111 USA
    24  */
    27 #include <stdarg.h>
    28 #include <stdbool.h>
    29 #include <stdint.h>
    30 #include <stdio.h>
    31 #include <stdlib.h>
    32 #include <string.h>
    34 #include "bitblt.h"
    35 #include "pdf.h"
    36 #include "pdf_util.h"
    37 #include "pdf_prim.h"
    38 #include "pdf_private.h"
    41 struct pdf_array_elem
    42 {
    43   struct pdf_array_elem *next;
    44   struct pdf_obj        *val;
    45 };
    48 struct pdf_array
    49 {
    50   struct pdf_array_elem *first;
    51   struct pdf_array_elem *last;
    52 };
    55 struct pdf_dict_entry
    56 {
    57   struct pdf_dict_entry *next;
    58   char                  *key;
    59   struct pdf_obj        *val;
    60 };
    63 struct pdf_dict
    64 {
    65   struct pdf_dict_entry *first;
    66 };
    69 #define STREAM_BUF_SIZE 4096
    71 struct pdf_stream
    72 {
    73   struct pdf_obj *stream_dict;
    74   struct pdf_obj *length;
    75   pdf_stream_write_callback callback;
    76   void *app_data;  /* arg to pass to callback */
    77   struct pdf_obj *filters;  /* name or array of names */
    78   struct pdf_obj *decode_parms;
    80   /* The following fields are used by pdf_stream_write_bits() and
    81      pdf_stream_flush_bits(). */
    82   uint32_t byte_idx;  /* index to next byte position in data buffer */
    83   uint32_t bit_idx;   /* index to next bit position in data buffer,
    84 			 0 = MSB, 7 = LSB */
    85   uint8_t data [STREAM_BUF_SIZE];
    86 };
    89 struct pdf_obj
    90 {
    91   /* these fields only apply to indirectly referenced objects */
    92   struct pdf_obj      *prev;
    93   struct pdf_obj      *next;
    94   unsigned long       obj_num;
    95   unsigned long       obj_gen;
    96   long int            file_offset;
    98   /* these fields apply to all objects */
    99   unsigned long       ref_count;
   100   pdf_obj_type        type;
   101   union {
   102     bool              boolean;
   103     char              *name;
   104     char              *string;
   105     long              integer;
   106     double            real;
   107     struct pdf_obj    *ind_ref;
   108     struct pdf_dict   dict;
   109     struct pdf_array  array;
   110     struct pdf_stream stream;
   111   } val;
   112 };
   115 struct pdf_obj *ref (struct pdf_obj *obj)
   116 {
   117   obj->ref_count++;
   118   return (obj);
   119 }
   122 void unref (struct pdf_obj *obj)
   123 {
   124   if ((--obj->ref_count) == 0)
   125     {
   126       /* $$$ free the object */
   127     }
   128 }
   131 struct pdf_obj *pdf_deref_ind_obj (struct pdf_obj *ind_obj)
   132 {
   133   pdf_assert (ind_obj->type == PT_IND_REF);
   134   return (ind_obj->val.ind_ref);
   135 }
   138 void pdf_set_dict_entry (struct pdf_obj *dict_obj, char *key, struct pdf_obj *val)
   139 {
   140   struct pdf_dict_entry *entry;
   142   if (dict_obj->type == PT_IND_REF)
   143     dict_obj = pdf_deref_ind_obj (dict_obj);
   145   pdf_assert (dict_obj->type == PT_DICTIONARY);
   147   /* replacing existing entry? */
   148   for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
   149     if (strcmp (entry->key, key) == 0)
   150       {
   151 	unref (entry->val);
   152 	entry->val = ref (val);
   153 	return;
   154       }
   156   /* new entry */
   157   entry = pdf_calloc (1, sizeof (struct pdf_dict_entry));
   159   entry->next = dict_obj->val.dict.first;
   160   dict_obj->val.dict.first = entry;
   162   entry->key = pdf_strdup (key);
   163   entry->val = ref (val);
   164 }
   167 struct pdf_obj *pdf_get_dict_entry (struct pdf_obj *dict_obj, char *key)
   168 {
   169   struct pdf_dict_entry *entry;
   171   if (dict_obj->type == PT_IND_REF)
   172     dict_obj = pdf_deref_ind_obj (dict_obj);
   174   pdf_assert (dict_obj->type == PT_DICTIONARY);
   176   for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
   177     if (strcmp (entry->key, key) == 0)
   178       return (entry->val);
   180   return (NULL);
   181 }
   184 void pdf_add_array_elem (struct pdf_obj *array_obj, struct pdf_obj *val)
   185 {
   186   struct pdf_array_elem *elem = pdf_calloc (1, sizeof (struct pdf_array_elem));
   188   if (array_obj->type == PT_IND_REF)
   189     array_obj = pdf_deref_ind_obj (array_obj);
   191   pdf_assert (array_obj->type == PT_ARRAY);
   193   elem->val = ref (val);
   195   if (! array_obj->val.array.first)
   196     array_obj->val.array.first = elem;
   197   else
   198     array_obj->val.array.last->next = elem;
   200   array_obj->val.array.last = elem;
   201 }
   204 struct pdf_obj *pdf_new_obj (pdf_obj_type type)
   205 {
   206   struct pdf_obj *obj = pdf_calloc (1, sizeof (struct pdf_obj));
   207   obj->type = type;
   208   return (obj);
   209 }
   212 struct pdf_obj *pdf_new_bool (bool val)
   213 {
   214   struct pdf_obj *obj = pdf_new_obj (PT_BOOL);
   215   obj->val.boolean = val;
   216   return (obj);
   217 }
   220 struct pdf_obj *pdf_new_name (char *name)
   221 {
   222   struct pdf_obj *obj = pdf_new_obj (PT_NAME);
   223   obj->val.name = pdf_strdup (name);
   224   return (obj);
   225 }
   228 struct pdf_obj *pdf_new_string (char *str)
   229 {
   230   struct pdf_obj *obj = pdf_new_obj (PT_STRING);
   231   obj->val.string = pdf_strdup (str);
   232   return (obj);
   233 }
   236 struct pdf_obj *pdf_new_integer (long val)
   237 {
   238   struct pdf_obj *obj = pdf_new_obj (PT_INTEGER);
   239   obj->val.integer = val;
   240   return (obj);
   241 }
   244 struct pdf_obj *pdf_new_real (double val)
   245 {
   246   struct pdf_obj *obj = pdf_new_obj (PT_REAL);
   247   obj->val.real = val;
   248   return (obj);
   249 }
   252 struct pdf_obj *pdf_new_stream (pdf_file_handle pdf_file,
   253 				struct pdf_obj *stream_dict,
   254 				pdf_stream_write_callback callback,
   255 				void *app_data)
   256 {
   257   struct pdf_obj *obj = pdf_new_obj (PT_STREAM);
   259   obj->val.stream.stream_dict = stream_dict;
   260   obj->val.stream.length = pdf_new_ind_ref (pdf_file, pdf_new_integer (0));
   261   pdf_set_dict_entry (obj->val.stream.stream_dict, "Length", obj->val.stream.length);
   263   obj->val.stream.callback = callback;
   264   obj->val.stream.app_data = app_data;
   265   return (obj);
   266 }
   269 /* $$$ currently limited to one filter per stream */
   270 void pdf_stream_add_filter (struct pdf_obj *stream,
   271 			    char *filter_name,
   272 			    struct pdf_obj *decode_parms)
   273 {
   274   if (stream->type == PT_IND_REF)
   275     stream = pdf_deref_ind_obj (stream);
   277   pdf_assert (stream->type == PT_STREAM);
   279   pdf_set_dict_entry (stream->val.stream.stream_dict, "Filter", pdf_new_name (filter_name));
   280   if (decode_parms)
   281     pdf_set_dict_entry (stream->val.stream.stream_dict, "DecodeParms", decode_parms);
   282 }
   285 struct pdf_obj *pdf_new_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *obj)
   286 {
   287   struct pdf_obj *ind_obj;
   289   pdf_assert (obj->type != PT_IND_REF);
   291   ind_obj = pdf_new_obj (PT_IND_REF);
   293   ind_obj->type = PT_IND_REF;
   294   ind_obj->val.ind_ref = obj;
   296   /* is there already an indirect reference to this object? */
   297   if (! obj->obj_num)
   298     {
   299       /* no, assign object number/generation and add to linked list */
   300       if (! pdf_file->first_ind_obj)
   301 	{
   302 	  obj->obj_num = 1;
   303 	  pdf_file->first_ind_obj = pdf_file->last_ind_obj = obj;
   304 	}
   305       else
   306 	{
   307 	  obj->obj_num = pdf_file->last_ind_obj->obj_num + 1;
   308 	  pdf_file->last_ind_obj->next = obj;
   309 	  obj->prev = pdf_file->last_ind_obj;
   310 	  pdf_file->last_ind_obj = obj;
   311 	}
   312     }
   314   return (ind_obj);
   315 }
   318 long pdf_get_integer (struct pdf_obj *obj)
   319 {
   320   if (obj->type == PT_IND_REF)
   321     obj = pdf_deref_ind_obj (obj);
   323   pdf_assert (obj->type == PT_INTEGER);
   325   return (obj->val.integer);
   326 }
   328 void pdf_set_integer (struct pdf_obj *obj, long val)
   329 {
   330   if (obj->type == PT_IND_REF)
   331     obj = pdf_deref_ind_obj (obj);
   333   pdf_assert (obj->type == PT_INTEGER);
   335   obj->val.integer = val;
   336 }
   339 double pdf_get_real (struct pdf_obj *obj)
   340 {
   341   if (obj->type == PT_IND_REF)
   342     obj = pdf_deref_ind_obj (obj);
   344   pdf_assert (obj->type == PT_REAL);
   346   return (obj->val.real);
   347 }
   349 void pdf_set_real (struct pdf_obj *obj, double val)
   350 {
   351   if (obj->type == PT_IND_REF)
   352     obj = pdf_deref_ind_obj (obj);
   354   pdf_assert (obj->type == PT_REAL);
   356   obj->val.real = val;
   357 }
   360 static int name_char_needs_quoting (char c)
   361 {
   362   return ((c < '!')  || (c > '~')  || (c == '/') || (c == '\\') ||
   363 	  (c == '(') || (c == ')') || (c == '<') || (c == '>')  ||
   364 	  (c == '[') || (c == ']') || (c == '{') || (c == '}')  ||
   365 	  (c == '%'));
   366 }
   369 void pdf_write_name (pdf_file_handle pdf_file, char *s)
   370 {
   371   fprintf (pdf_file->f, "/");
   372   while (*s)
   373     if (name_char_needs_quoting (*s))
   374       fprintf (pdf_file->f, "#%02x", 0xff & *(s++));
   375     else
   376       fprintf (pdf_file->f, "%c", *(s++));
   377   fprintf (pdf_file->f, " ");
   378 }
   381 static int string_char_needs_quoting (char c)
   382 {
   383   return ((c < ' ')  || (c > '~')  || (c == '\\') ||
   384 	  (c == '(') || (c == ')'));
   385 }
   388 void pdf_write_string (pdf_file_handle pdf_file, char *s)
   389 {
   390   fprintf (pdf_file->f, "(");
   391   while (*s)
   392     if (string_char_needs_quoting (*s))
   393       fprintf (pdf_file->f, "\\%03o", 0xff & *(s++));
   394     else
   395       fprintf (pdf_file->f, "%c", *(s++));
   396   fprintf (pdf_file->f, ") ");
   397 }
   400 void pdf_write_real (pdf_file_handle pdf_file, double num)
   401 {
   402   /* $$$ not actually good enough, precision needs to be variable,
   403      and no exponent is allowed */
   404   fprintf (pdf_file->f, "%0f ", num);
   405 }
   408 void pdf_write_ind_ref (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
   409 {
   410   struct pdf_obj *obj = pdf_deref_ind_obj (ind_obj);
   411   fprintf (pdf_file->f, "%ld %ld R ", obj->obj_num, obj->obj_gen);
   412 }
   415 void pdf_write_array (pdf_file_handle pdf_file, struct pdf_obj *array_obj)
   416 {
   417   struct pdf_array_elem *elem;
   419   pdf_assert (array_obj->type == PT_ARRAY);
   421   fprintf (pdf_file->f, "[ ");
   422   for (elem = array_obj->val.array.first; elem; elem = elem->next)
   423     {
   424       pdf_write_obj (pdf_file, elem->val);
   425       fprintf (pdf_file->f, " ");
   426     }
   427   fprintf (pdf_file->f, "] ");
   428 }
   431 void pdf_write_dict (pdf_file_handle pdf_file, struct pdf_obj *dict_obj)
   432 {
   433   struct pdf_dict_entry *entry;
   435   pdf_assert (dict_obj->type == PT_DICTIONARY);
   437   fprintf (pdf_file->f, "<<\r\n");
   438   for (entry = dict_obj->val.dict.first; entry; entry = entry->next)
   439     {
   440       pdf_write_name (pdf_file, entry->key);
   441       fprintf (pdf_file->f, " ");
   442       pdf_write_obj (pdf_file, entry->val);
   443       fprintf (pdf_file->f, "\r\n");
   444     }
   445   fprintf (pdf_file->f, ">>\r\n");
   446 }
   449 void pdf_stream_write_data (pdf_file_handle pdf_file,
   450 			    struct pdf_obj *stream,
   451 			    char *data,
   452 			    unsigned long len)
   453 {
   454   while (len)
   455     {
   456       unsigned long l2 = fwrite (data, 1, len, pdf_file->f);
   457       data += l2;
   458       len -= l2;
   459       if (ferror (pdf_file->f))
   460 	pdf_fatal ("error writing stream data\n");
   461     }
   462 }
   465 void pdf_stream_flush_bits (pdf_file_handle pdf_file,
   466 			    struct pdf_obj *stream)
   467 {
   468   struct pdf_stream *s = & stream->val.stream;
   470   if (s->bit_idx)
   471     {
   472       /* zero remaining bits in last byte */
   473       s->data [s->byte_idx] &= ~ ((1 << (8 - s->bit_idx)) - 1);
   474       s->byte_idx++;
   475       s->bit_idx = 0;
   476     }
   477   pdf_stream_write_data (pdf_file, stream, 
   478 			 (char *) & s->data [0],
   479 			 s->byte_idx);
   480   s->byte_idx = 0;
   481 }
   484 static void pdf_stream_advance_byte (pdf_file_handle pdf_file,
   485 				     struct pdf_obj *stream)
   486 {
   487   struct pdf_stream *s = & stream->val.stream;
   489   s->byte_idx++;
   490   s->bit_idx = 0;
   491   if (s->byte_idx == STREAM_BUF_SIZE)
   492     pdf_stream_flush_bits (pdf_file, stream);
   493 }
   496 void pdf_stream_write_bits (pdf_file_handle pdf_file,
   497 			    struct pdf_obj *stream,
   498 			    uint32_t count,
   499 			    uint32_t bits)
   500 {
   501   struct pdf_stream *s = & stream->val.stream;
   503   uint32_t b2;  /* how many bits will fit in byte in data buffer */
   504   uint32_t c2;  /* how many bits to transfer on this iteration */
   505   uint32_t d2;  /* bits to transfer on this iteration */
   507   while (count)
   508     {
   509       b2 = 8 - s->bit_idx;
   510       if (b2 >= count)
   511 	c2 = count;
   512       else
   513 	c2 = b2;
   514       d2 = bits >> (count - c2);
   515       s->data [s->byte_idx] |= (d2 << (b2 + c2));
   516       s->bit_idx += c2;
   517       if (s->bit_idx > 7)
   518 	pdf_stream_advance_byte (pdf_file, stream);
   519       count -= c2;
   520     }
   521 }
   524 void pdf_stream_printf (pdf_file_handle pdf_file,
   525 			struct pdf_obj *stream,
   526 			char *fmt, ...)
   527 {
   528   va_list ap;
   530   va_start (ap, fmt);
   531   vfprintf (pdf_file->f, fmt, ap);
   532   va_end (ap);
   533 }
   536 void pdf_write_stream (pdf_file_handle pdf_file, struct pdf_obj *stream)
   537 {
   538   unsigned long begin_pos, end_pos;
   540   pdf_assert (stream->type == PT_STREAM);
   542   pdf_write_dict (pdf_file, stream->val.stream.stream_dict);
   543   fprintf (pdf_file->f, "stream\r\n");
   544   begin_pos = ftell (pdf_file->f);
   545   stream->val.stream.callback (pdf_file,
   546 			       stream,
   547 			       stream->val.stream.app_data);
   548   end_pos = ftell (pdf_file->f);
   549   fprintf (pdf_file->f, "\r\nendstream\r\n");
   551   pdf_set_integer (stream->val.stream.length, end_pos - begin_pos);
   552 }
   555 void pdf_write_obj (pdf_file_handle pdf_file, struct pdf_obj *obj)
   556 {
   557   switch (obj->type)
   558     {
   559     case PT_NULL:
   560       fprintf (pdf_file->f, "null ");
   561       break;
   562     case PT_BOOL:
   563       if (obj->val.boolean)
   564 	fprintf (pdf_file->f, "true ");
   565       else
   566 	fprintf (pdf_file->f, "false ");
   567       break;
   568     case PT_NAME:
   569       pdf_write_name (pdf_file, obj->val.name);
   570       break;
   571     case PT_STRING:
   572       pdf_write_string (pdf_file, obj->val.string);
   573       break;
   574     case PT_INTEGER:
   575       fprintf (pdf_file->f, "%ld ", obj->val.integer);
   576       break;
   577     case PT_REAL:
   578       pdf_write_real (pdf_file, obj->val.real);
   579       break;
   580     case PT_IND_REF:
   581       pdf_write_ind_ref (pdf_file, obj);
   582       break;
   583     case PT_DICTIONARY:
   584       pdf_write_dict (pdf_file, obj);
   585       break;
   586     case PT_ARRAY:
   587       pdf_write_array (pdf_file, obj);
   588       break;
   589     case PT_STREAM:
   590       pdf_write_stream (pdf_file, obj);
   591       break;
   592     default:
   593       pdf_fatal ("bad object type\n");
   594     }
   595 }
   598 void pdf_write_ind_obj (pdf_file_handle pdf_file, struct pdf_obj *ind_obj)
   599 {
   600   struct pdf_obj *obj;
   602   if (ind_obj->type == PT_IND_REF)
   603     obj = pdf_deref_ind_obj (ind_obj);
   604   else
   605     obj = ind_obj;
   607   obj->file_offset = ftell (pdf_file->f);
   608   fprintf (pdf_file->f, "%ld %ld obj\r\n", obj->obj_num, obj->obj_gen);
   609   pdf_write_obj (pdf_file, obj);
   610   fprintf (pdf_file->f, "endobj\r\n");
   611 }
   614 void pdf_write_all_ind_obj (pdf_file_handle pdf_file)
   615 {
   616   struct pdf_obj *ind_obj;
   617   for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
   618     if (! ind_obj->file_offset)
   619       pdf_write_ind_obj (pdf_file, ind_obj);
   620 }
   623 unsigned long pdf_write_xref (pdf_file_handle pdf_file)
   624 {
   625   struct pdf_obj *ind_obj;
   626   pdf_file->xref_offset = ftell (pdf_file->f);
   627   fprintf (pdf_file->f, "xref\r\n");
   628   fprintf (pdf_file->f, "0 %ld\r\n", pdf_file->last_ind_obj->obj_num + 1);
   629   fprintf (pdf_file->f, "0000000000 65535 f\r\n");
   630   for (ind_obj = pdf_file->first_ind_obj; ind_obj; ind_obj = ind_obj->next)
   631     fprintf (pdf_file->f, "%010ld 00000 n\r\n", ind_obj->file_offset);
   632   return (pdf_file->last_ind_obj->obj_num + 1);
   633 }