A file stream is a higher level interface on the primitive file descriptor facilities. A stream is linked to a file using an open operation. A stream is dissociated from a file using the close operation.

FILE structure is a complex structure that stores various data members needed for performing operations on a file.

struct _IO_FILE {
    int _flags;
    char *_IO_read_ptr;
    char *_IO_read_end;
    char *_IO_read_base;
    char *_IO_write_base;
    char *_IO_write_ptr;
    char *_IO_write_end;
    char *_IO_buf_base;
    char *_IO_buf_end;
    char *_IO_save_base;
    char *_IO_backup_base;
    char *_IO_save_end;
    _IO_marker *_markers;
    _IO_FILE *_chain;
    int _fileno;
    int _flags2;
    __off_t _old_offset;
    unsigned short _cur_column;
    signed char _vtable_offset;
    char _shortbuf[1];
    _IO_lock_t *_lock;
    __off64_t _offset;
    _IO_codecvt *_codecvt;
    _IO_wide_data *_wide_data;
    _IO_FILE *_freeres_list;
    void *_freeres_buf;
    size_t __pad5;
    int _mode;
    char _unused2[20];
}

typedef struct _IO_FILE FILE;

_flags is used to record the attribute of a stream such as read-only, read-write, etc. _IO_buf_base represents the start address of the buffer that is going to be used. _IO_buf_end is the buffer’s end address (reserve buffer). _IO_read_base and _IO_read_end are the start and end addresses of the read/input buffer respectively. _IO_write_base and _IO_write_end are the start and end addresses of the write/output buffer respectively. _IO_read_ptr and _IO_write_ptr point to the current position in their respective buffers. _fileno is the file descriptor of the open file which is returned by the open() system call internally used by fopen().

A fstream can be in at most one of put mode,get mode or putback mode. In a filebuf, there is only one current position , instead of two separate get and put pointers. In get mode, the current position is that of gptr() and in put mode, the current position is that of pptr(). The position in the buffer, that corresponds to the position in external file system is normally _IO_read_end, except in putback and append mode, where it is _IO_save_end.

Buffering of Streams

  • No buffering: When a stream is unbuffered, characters are intended to appear from the source or at the destination as soon as possible. Each I/O operation is written as soon as possible.

  • Line Buffering: On output, data is written when a newline character is inserted into the stream or when the buffer is full, whichever happens first. On input, the buffer is filled upto the occurence of a newline character.

  • Full Buffering: On output, data is written once the buffer is full. On input, the buffer is filled when an input operation is requested and the buffer is empty.

Usually, stdout is in line buffering mode, stderr in no buffering mode and


_IO_FILE_plus is an extension of the _IO_FILE struct with a vtable pointer.

struct _IO_FILE_plus {
    _IO_FILE file;
    const _IO_jump_t *vtable;
}

FILE structures form a linked list. _IO_list_all is the head of that linked list and _chain is the pointer to the next structure.

fopen

fopen() is used to open files. It allocates heap memory for the FILE structure by calling malloc.

*new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

After that, the vtable is initialized for the create FILE and _IO_file_init is further called to initialize the operation.

_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
_IO_file_init (&new_f->fp);

After that, the FILE structure is linked to _IO_list_all by calling _IO_link_in. void

void _IO_link_in (struct _IO_FILE_plus *fp)
{
  if ((fp->file._flags & _IO_LINKED) == 0)
    {
      fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
      _IO_cleanup_region_start_noarg (flush_cleanup);
      _IO_lock_lock (list_all_lock);
      run_fp = (FILE *) fp;
      _IO_flockfile ((FILE *) fp);
#endif
      fp->file._chain = (FILE *) _IO_list_all;
      _IO_list_all = fp;
#ifdef _IO_MTSAFE_IO
      _IO_funlockfile ((FILE *) fp);
      run_fp = NULL;
      _IO_lock_unlock (list_all_lock);
      _IO_cleanup_region_end (0);
#endif
    }
}

After that, the _IO_file_fopen function is called which invokes the open() system call in order to open the target file according to the open mode provided by the user (such as read/read-write, etc.)

_IO_FILE *_IO_file_open (_IO_FILE *fp, const char *filename, int posix_mode, int prot,
           int read_write, int is32not64)
{
    
  int fdesc;

  ...
  #  Calling system functions open Open file   
  fdesc = open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
  ...
  #  Set the file descriptor to FILE The corresponding field of the structure _fileno in 
  fp->_fileno = fdesc;
  ...
  # Call again _IO_link_in
  _IO_link_in ((struct _IO_FILE_plus *) fp);
  return fp;
}
libc_hidden_def (_IO_file_open)

fclose

This function is closed for closing files.

int fclose(FILE *stream);

It flushes the stream and closes the underlying file descriptor. First of all, the FILE stream is unlinked from the linked list of file structures.

_IO_un_link ((struct _IO_FILE_plus *) fp);

After that, the _IO_file_close_it function is called.

int _IO_new_file_close_it (_IO_FILE *fp)
{
  int write_status;
  if (!_IO_file_is_open (fp))
    return EOF;

  if ((fp->_flags & _IO_NO_WRITES) == 0
      && (fp->_flags & _IO_CURRENTLY_PUTTING) != 0)
    write_status = _IO_do_flush (fp);
  else
    write_status = 0;

  _IO_unsave_markers (fp);

  int close_status = ((fp->_flags2 & _IO_FLAGS2_NOCLOSE) == 0
              ? _IO_SYSCLOSE (fp) : 0);

  /* Free buffer. */
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
  if (fp->_mode > 0)
    {
      if (_IO_have_wbackup (fp))
    _IO_free_wbackup_area (fp);
      _IO_wsetb (fp, NULL, NULL, 0);
      _IO_wsetg (fp, NULL, NULL, NULL);
      _IO_wsetp (fp, NULL, NULL);
    }
#endif
  _IO_setb (fp, NULL, NULL, 0);
  _IO_setg (fp, NULL, NULL, NULL);
  _IO_setp (fp, NULL, NULL);

  _IO_un_link ((struct _IO_FILE_plus *) fp);
  fp->_flags = _IO_MAGIC|CLOSED_FILEBUF_FLAGS;
  fp->_fileno = -1;
  fp->_offset = _IO_pos_BAD;

  return close_status ? close_status : write_status;
}
libc_hidden_ver (_IO_new_file_close_it, _IO_file_close_it)

After that, _IO_file_finish is called, which corresponds to the _IO_file_finish function, which will call the free function to release the previously allocated FILE structure.

fread

This function is used to read data from a file stream.

size_t fread(void *restrict ptr, size_t size, size_t nmemb,FILE *restrict stream);
size_t _IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
  size_t bytes_requested = size * count;
  size_t bytes_read;
  CHECK_FILE (fp, 0);
  if (bytes_requested == 0)
    return 0;
  _IO_acquire_lock (fp);
  bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
  _IO_release_lock (fp);
  return bytes_requested == bytes_read ? count : bytes_read / size;
}

_IO_sgetn further calls _IO_XSGETN

_IO_size_t _IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{
  /* FIXME handle putback buffer here! */
  return _IO_XSGETN (fp, data, n);
}

_IO_size_t _IO_file_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{
  _IO_size_t want, have;
  _IO_ssize_t count;
  char *s = data;

  want = n;

  if (fp->_IO_buf_base == NULL)
    {
      /* Maybe we already have a push back pointer.  */
      if (fp->_IO_save_base != NULL)
    {
      free (fp->_IO_save_base);
      fp->_flags &= ~_IO_IN_BACKUP;
    }
      _IO_doallocbuf (fp);
    }

  while (want > 0)
    {
      have = fp->_IO_read_end - fp->_IO_read_ptr;
      if (want <= have)
    {
      memcpy (s, fp->_IO_read_ptr, want);
      fp->_IO_read_ptr += want;
      want = 0;
    }
      else
    {
      if (have > 0)
        {
#ifdef _LIBC
          s = __mempcpy (s, fp->_IO_read_ptr, have);
#else
          memcpy (s, fp->_IO_read_ptr, have);
          s += have;
#endif
          want -= have;
          fp->_IO_read_ptr += have;
        }

      /* Check for backup and repeat */
      if (_IO_in_backup (fp))
        {
          _IO_switch_to_main_get_area (fp);
          continue;
        }

      /* If we now want less than a buffer, underflow and repeat
         the copy.  Otherwise, _IO_SYSREAD directly to
         the user buffer. */
      if (fp->_IO_buf_base
          && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
        {
          if (__underflow (fp) == EOF)
        break;

          continue;
        }

      /* These must be set before the sysread as we might longjmp out
         waiting for input. */
      _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
      _IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);

      /* Try to maintain alignment: read a whole number of blocks.  */
      count = want;
      if (fp->_IO_buf_base)
        {
          _IO_size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
          if (block_size >= 128)
        count -= want % block_size;
        }

      count = _IO_SYSREAD (fp, s, count);
      if (count <= 0)
        {
          if (count == 0)
        fp->_flags |= _IO_EOF_SEEN;
          else
        fp->_flags |= _IO_ERR_SEEN;

          break;
        }

      s += count;
      want -= count;
      if (fp->_offset != _IO_pos_BAD)
        _IO_pos_adjust (fp->_offset, count);
    }
    }

  return n - want;
}

Conclusion: Whenever fread is called, a buffer is allocated if it is NULL. _IO_doallocbuf is called to allocate a new buffer for the file stream. After that, it executes the read system call to read data from the stream into the buffer. Finally, the data is copied from the stream buffer to the destination.

fwrite

size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream);

This function is used to write data to a file stream.

size_t _IO_fwrite (const void *buf, size_t size, size_t count, FILE *fp)
{
  size_t request = size * count;
  size_t written = 0;
  CHECK_FILE (fp, 0);
  if (request == 0)
    return 0;
  _IO_acquire_lock (fp);
  if (_IO_vtable_offset (fp) != 0 || _IO_fwide (fp, -1) == -1)
    written = _IO_sputn (fp, (const char *) buf, request);
  _IO_release_lock (fp);
  /* We have written all of the input in case the return value indicates
     this or EOF is returned.  The latter is a special case where we
     simply did not manage to flush the buffer.  But the data is in the
     buffer and therefore written as far as fwrite is concerned.  */
  if (written == request || written == EOF)
    return count;
  else
    return written / size;
}

Further, the _IO_XSPUTN function is called.

#define _IO_sputn(__fp, __s, __n) _IO_XSPUTN (__fp, __s, __n)
#define _IO_new_file_xsputn _IO_file_xsputn

_IO_size_t _IO_new_file_xsputn (_IO_FILE *f, const void *data, _IO_size_t n)
{
  const char *s = (const char *) data;
  _IO_size_t to_do = n;
  int must_flush = 0;
  _IO_size_t count = 0;

  if (n <= 0)
    return 0;
  /* This is an optimized implementation.
     If the amount to be written straddles a block boundary
     (or the filebuf is unbuffered), use sys_write directly. */

  /* First figure out how much space is available in the buffer. */
  if ((f->_flags & _IO_LINE_BUF) && (f->_flags & _IO_CURRENTLY_PUTTING))
    {
      count = f->_IO_buf_end - f->_IO_write_ptr;
      if (count >= n)
    {
      const char *p;
      for (p = s + n; p > s; )
        {
          if (*--p == '\n')
        {
          count = p - s + 1;
          must_flush = 1;
          break;
        }
        }
    }
    }
  else if (f->_IO_write_end > f->_IO_write_ptr)
    count = f->_IO_write_end - f->_IO_write_ptr; /* Space available. */

  /* Then fill the buffer. */
  if (count > 0)
    {
      if (count > to_do)
    count = to_do;
#ifdef _LIBC
      f->_IO_write_ptr = __mempcpy (f->_IO_write_ptr, s, count);
#else
      memcpy (f->_IO_write_ptr, s, count);
      f->_IO_write_ptr += count;
#endif
      s += count;
      to_do -= count;
    }
  if (to_do + must_flush > 0)
    {
      _IO_size_t block_size, do_write;
      /* Next flush the (full) buffer. */
      if (_IO_OVERFLOW (f, EOF) == EOF)
    /* If nothing else has to be written we must not signal the
       caller that everything has been written.  */
    return to_do == 0 ? EOF : n - to_do;

      /* Try to maintain alignment: write a whole number of blocks.  */
      block_size = f->_IO_buf_end - f->_IO_buf_base;
      do_write = to_do - (block_size >= 128 ? to_do % block_size : 0);

      if (do_write)
    {
      count = new_do_write (f, s, do_write);
      to_do -= count;
      if (count < do_write)
        return n - to_do;
    }

      /* Now write out the remainder.  Normally, this will fit in the
     buffer, but it's somewhat messier for line-buffered files,
     so we let _IO_default_xsputn handle the general case. */
      if (to_do)
    to_do -= _IO_default_xsputn (f, s+do_write, to_do);
    }
  return n - to_do;
}

In the code above, we can see that _IO_OVERFLOW is called.

int
_IO_new_file_overflow (_IO_FILE *f, int ch)
{
  if (f->_flags & _IO_NO_WRITES) /* SET ERROR */
    {
      f->_flags |= _IO_ERR_SEEN;
      __set_errno (EBADF);
      return EOF;
    }
  /* If currently reading or no buffer allocated. */
  if ((f->_flags & _IO_CURRENTLY_PUTTING) == 0 || f->_IO_write_base == NULL)
    {
      /* Allocate a buffer if needed. */
      if (f->_IO_write_base == NULL)
    {
      _IO_doallocbuf (f);
      _IO_setg (f, f->_IO_buf_base, f->_IO_buf_base, f->_IO_buf_base);
    }
      /* Otherwise must be currently reading.
     If _IO_read_ptr (and hence also _IO_read_end) is at the buffer end,
     logically slide the buffer forwards one block (by setting the
     read pointers to all point at the beginning of the block).  This
     makes room for subsequent output.
     Otherwise, set the read pointers to _IO_read_end (leaving that
     alone, so it can continue to correspond to the external position). */
      if (__glibc_unlikely (_IO_in_backup (f)))
    {
      size_t nbackup = f->_IO_read_end - f->_IO_read_ptr;
      _IO_free_backup_area (f);
      f->_IO_read_base -= MIN (nbackup,
                   f->_IO_read_base - f->_IO_buf_base);
      f->_IO_read_ptr = f->_IO_read_base;
    }

      if (f->_IO_read_ptr == f->_IO_buf_end)
    f->_IO_read_end = f->_IO_read_ptr = f->_IO_buf_base;
      f->_IO_write_ptr = f->_IO_read_ptr;
      f->_IO_write_base = f->_IO_write_ptr;
      f->_IO_write_end = f->_IO_buf_end;
      f->_IO_read_base = f->_IO_read_ptr = f->_IO_read_end;

      f->_flags |= _IO_CURRENTLY_PUTTING;
      if (f->_mode <= 0 && f->_flags & (_IO_LINE_BUF | _IO_UNBUFFERED))
    f->_IO_write_end = f->_IO_write_ptr;
    }
  if (ch == EOF)
    return _IO_do_write (f, f->_IO_write_base,
             f->_IO_write_ptr - f->_IO_write_base);
  if (f->_IO_write_ptr == f->_IO_buf_end ) /* Buffer is really full */
    if (_IO_do_flush (f) == EOF)
      return EOF;
  *f->_IO_write_ptr++ = ch;
  if ((f->_flags & _IO_UNBUFFERED)
      || ((f->_flags & _IO_LINE_BUF) && ch == '\n'))
    if (_IO_do_write (f, f->_IO_write_base,
              f->_IO_write_ptr - f->_IO_write_base) == EOF)
      return EOF;
  return (unsigned char) ch;
}

Conclusion: The workflow of fwrite is similar to fread. It allocates a new buffer using _IO_doallocbuf if the stream buffer is NULL. Then, user data is copied to the stream buffer from the source and data is written to the file stream if the stream buffer is full. This is also known as flushing the stream.

vtables

In C++, for every class that contains virtual functions, the compiler constructs a virtual table known as vtable. It contains an entry for each virtual function accessible by the class and stores a pointer to its definition. Entries in the vtable can point to either functions declared in the class itself, or virtual functions inherited from a base class. There is a single vtable per class and is shared by all the instances of that class. There’s a pointer known as vptr that points to a vtable.

Earlier, we studied about the _IO_file_plus structure.

struct _IO_FILE_plus
{
  _IO_FILE file;
  const struct _IO_jump_t *vtable;
};

The vtable:

struct _IO_jump_t
{
    size_t __dummy;
    size_t __dummy2;
    _IO_finish_t __finish;
    _IO_overflow_t __overflow;
    _IO_underflow_t __underflow;
    _IO_underflow_t __uflow;
    _IO_pbackfail_t __pbackfail;
    _IO_xsputn_t __xsputn;
    _IO_xsgetn_t __xsgetn;
    _IO_seekoff_t __seekoff;
    _IO_seekpos_t __seekpos;
    _IO_setbuf_t __setbuf;
    _IO_sync_t __sync;
    _IO_doallocate_t __doallocate;
    _IO_read_t __read;
    _IO_write_t __write;
    _IO_seek_t __seek;
    _IO_close_t __close;
    _IO_stat_t __stat;
    _IO_showmanyc_t __showmanyc;
    _IO_imbue_t __imbue;
};

The types of most of these members are function pointers associated with FILE.

In the next part of my FSOP notes, we’ll see how to exploit the FILE structure.