fread源码解析

fread源码解析

周四 4月 09 2026
2567 字 · 20 分钟

前置知识

在分析fread函数时,应该先明确一下输入缓冲区是怎么来的。

首先fread函数会先将数据读到输入缓冲区中,然后从输入缓冲区中执行memcpy函数,拷贝一定字节的数据到我们指定的内存地址上。而这个输入缓冲区是从哪到哪呢?由两个指针分别声明了这片区域的开始和结束,他们分别叫做_IO_read_base和_IO_read_end,他们之间的区域就是输入缓冲区。这样看起来似乎还需要一个输出缓冲区,难道需要malloc申请两个堆块来分别表示输入缓冲区和输出缓冲区么?非也,其实malloc函数自始至终只申请了一个堆块这个堆块的区域也叫做reserve area,而_IO_buf_base和_IO_buf_end两个指针则分别声明了这个reserve area的始末。然后将 _IO_read_ptr; _IO_read_end; _IO_read_base; _IO_write_base; _IO_write_ptr; _IO_write_end;这六个指针全部初始化为了_IO_buf_base的值,现在的输入缓冲区和输出缓冲区还不存在(因为现在 _IO_read_end和 _IO_read_base的值相同),以输入缓冲区为例,读入数据时是执行了系统调用read,而此时的数据是在reserve area中,紧接着 _IO_read_end就会加上刚刚读入的数据的个数,那么此时 _IO_read_end和 _IO_read_base的值变的不同了。而现在这二者之间的区域就成为输入缓冲区

再提一下_IO_FILE结构体中的一些指针(如下)

char* _IO_read_ptr; /* Current read pointer */
char* _IO_read_end; /* End of get area. */
char* _IO_read_base; /* Start of putback+get area. */
char* _IO_write_base; /* Start of put area. */
char* _IO_write_ptr; /* Current put pointer. */
char* _IO_write_end; /* End of put area. */
char* _IO_buf_base; /* Start of reserve area. */
char* _IO_buf_end; /* End of reserve area. */

其中_IO_buf_base 和 _IO_buf_end两个指针的作用分别是标明reserve area的始末_IO_read_base 和 _IO_read_end两个指针的作用分别是标明输入缓冲区的始末(write那两个指针同理),现在假设有一个30字节的flag文件,然后我连续执行两次fread函数,每次从文件中只读10字节,那么第二次执行fread函数是从哪开始读呢,很明显并不是文件的开始来读取了,而是接着上回fread函数读到的位置,继续读10字节数据。但我们怎么去记录上回fread函数读到哪了呢,这就需要用到_IO_read_ptr指针了,它是来记录下一次数据应该从输入缓冲区的哪里开始读了。也就是说_IO_read_base 和 _IO_read_ptr 之间的区域是已经使用了的输入缓冲区,而 _IO_read_ptr 和 _IO_read_end之间的区域是输入缓冲区的剩余部分(也就是还未使用部分)

通过上面这两段文字,应该可以对刚学习IO的师傅对_IO_FILE结构体中表示缓冲区位置的指针有一些了解了。

同时这次还要提到vtable,它是_IO_FILE_plus结构体中的一个字段,也是一个虚表指针。它指向了_IO_jump_t结构体

struct _IO_FILE_plus
{
_IO_FILE file;
const struct _IO_jump_t *vtable;
};

对于_IO_jump_t结构体,我目前的理解它就是一个跳转表,这里放的都是函数指针。通过不同的偏移获取不同的函数指针,然后将其调用。

struct _IO_jump_t
{
JUMP_FIELD(size_t, __dummy);
JUMP_FIELD(size_t, __dummy2);
JUMP_FIELD(_IO_finish_t, __finish);
JUMP_FIELD(_IO_overflow_t, __overflow);
JUMP_FIELD(_IO_underflow_t, __underflow);
JUMP_FIELD(_IO_underflow_t, __uflow);
JUMP_FIELD(_IO_pbackfail_t, __pbackfail);
/* showmany */
JUMP_FIELD(_IO_xsputn_t, __xsputn);
JUMP_FIELD(_IO_xsgetn_t, __xsgetn);
JUMP_FIELD(_IO_seekoff_t, __seekoff);
JUMP_FIELD(_IO_seekpos_t, __seekpos);
JUMP_FIELD(_IO_setbuf_t, __setbuf);
JUMP_FIELD(_IO_sync_t, __sync);
JUMP_FIELD(_IO_doallocate_t, __doallocate);
JUMP_FIELD(_IO_read_t, __read);
JUMP_FIELD(_IO_write_t, __write);
JUMP_FIELD(_IO_seek_t, __seek);
JUMP_FIELD(_IO_close_t, __close);
JUMP_FIELD(_IO_stat_t, __stat);
JUMP_FIELD(_IO_showmanyc_t, __showmanyc);
JUMP_FIELD(_IO_imbue_t, __imbue);
#if 0
get_column;
set_column;
#endif
};

整体流程

Pasted image 20260409222325 Pasted image 20260409215903

Pasted image 20260409215924

源码解析

  1. 实际fread进入的是_IO_fread
#include "libioP.h"
size_t
_IO_fread (void *buf, size_t size, size_t count, FILE *fp)
{
size_t bytes_requested = size * count;
size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}
libc_hidden_def (_IO_fread)
weak_alias (_IO_fread, fread)
# ifndef _IO_MTSAFE_IO
strong_alias (_IO_fread, __fread_unlocked)
libc_hidden_def (__fread_unlocked)
weak_alias (_IO_fread, fread_unlocked)
# endif

Pasted image 20260408104521

  1. 进入_IO_sgetn Pasted image 20260408104641
size_t
_IO_sgetn (FILE *fp, void *data, size_t n)
{
/* FIXME handle putback buffer here! */
return _IO_XSGETN (fp, data, n);
}
libc_hidden_def (_IO_sgetn)

IO_JUMPS_FUNC(THIS)返回的是_IO_jump_t地址,对应FILE结构体的vtable字段,由于JUMP2第一个参数就是__xsgetn,所以->FUNC访问的就是_IO_jump_t结构体中__xsgetn,采用的是vtable + offset的形式(借此可以伪造vtable,实现任意合法执行)

#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)
#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)
# define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS)))
static inline const struct _IO_jump_t *
IO_validate_vtable (const struct _IO_jump_t *vtable)
{
uintptr_t ptr = (uintptr_t) vtable;
uintptr_t offset = ptr - (uintptr_t) &__io_vtables;
if (__glibc_unlikely (offset >= IO_VTABLES_LEN))
/* The vtable pointer is not in the expected section. Use the
slow path, which will terminate the process if necessary. */
_IO_vtable_check ();
return vtable;
}
void attribute_hidden
_IO_vtable_check (void)
{
#ifdef SHARED
/* Honor the compatibility flag. */
void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables);
PTR_DEMANGLE (flag);
if (flag == &_IO_vtable_check)
return;
/* In case this libc copy is in a non-default namespace, we always
need to accept foreign vtables because there is always a
possibility that FILE * objects are passed across the linking
boundary. */
{
Dl_info di;
struct link_map *l;
if (!rtld_active ()
|| (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0
&& l->l_ns != LM_ID_BASE))
return;
}
#else /* !SHARED */
/* We cannot perform vtable validation in the static dlopen case
because FILE * handles might be passed back and forth across the
boundary. Therefore, we disable checking in this case. */
if (__dlopen != NULL)
return;
#endif
__libc_fatal ("Fatal error: glibc detected an invalid stdio handle\n");
}
0x7ffff7e26661 <_IO_sgetn+33> jmp qword ptr [rax + 0x40] <__GI__IO_file_xsgetn>

Pasted image 20260408150628 Pasted image 20260408150513

  1. 进入__GI__IO_file_xsgetn Pasted image 20260408111344
size_t
_IO_file_xsgetn (FILE *fp, void *data, size_t n)
{
size_t want, have;
ssize_t count;
char *s = data;
want = n;
/*第一部分*/
//如果没有申请buf,_IO_doallocbuf内部malloc一个
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
_IO_free_backup_buf (fp, fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
while (want > 0)
{ /*第二部分*/
have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}
/*第三部分*/
/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}
/* If we now want less than a buffer, underflow and repeat
the copy. Otherwise, _IO_SYSREAD directly to
the user buffer. */
/*第四部分*/
if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
if (__underflow (fp) == EOF)
break;
continue;
}
/* These must be set before the sysread as we might longjmp out
waiting for input. */
_IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base);
_IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
/* Try to maintain alignment: read a whole number of blocks. */
count = want;
if (fp->_IO_buf_base)
{
size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base;
if (block_size >= 128)
count -= want % block_size;
}
count = _IO_SYSREAD (fp, s, count);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN;
break;
}
s += count;
want -= count;
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
}
}
return n - want;
}
libc_hidden_def (_IO_file_xsgetn)

第一部分 3.1 进入_IO_doallocbuf Pasted image 20260409222749

void
_IO_doallocbuf (FILE *fp)
{
if (fp->_IO_buf_base)
return;
if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0)
if (_IO_DOALLOCATE (fp) != EOF)
return;
_IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);
}
libc_hidden_def (_IO_doallocbuf)

3.2通过vtable进入_IO_file_doallocate Pasted image 20260409223643 3.3进入vtable中的_IO_file_stat函数,也是通过vtable+offset Pasted image 20260408145106 3.4调用fstat64,这个系统调用是来获取文件状态,并且初始化st结构体的 Pasted image 20260408145224 可以看到此时的st_blksize为4096 Pasted image 20260408150041 3.5而这个st_blksize也就是接下来malloc申请的内存大小 Pasted image 20260408150213 3.6调用_IO_setb ,这个函数主要就是对_IO_buf_base和_IO_buf_end指针进行赋值

void
_IO_setb (FILE *f, char *b, char *eb, int a)
{
if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF))
free (f->_IO_buf_base);
f->_IO_buf_base = b;
f->_IO_buf_end = eb;
if (a)
f->_flags &= ~_IO_USER_BUF;
else
f->_flags |= _IO_USER_BUF;
}
libc_hidden_def (_IO_setb)

执行前 Pasted image 20260408151353 执行后 Pasted image 20260408155610 第二部分 have是剩余的缓冲区大小,want是需要读入的大小 如果want小于等于have直接memcpy 如果have还有剩余但是want大于have,把能读的先读完

have = fp->_IO_read_end - fp->_IO_read_ptr;
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
if (have > 0)
{
s = __mempcpy (s, fp->_IO_read_ptr, have);
want -= have;
fp->_IO_read_ptr += have;
}

第三部分

如果当前处于备用的缓冲区,切换为主缓冲区

/* Check for backup and repeat */
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
continue;
}
#define _IO_in_backup(fp) ((fp)->_flags & _IO_IN_BACKUP)
void
_IO_switch_to_main_get_area (FILE *fp)
{
char *tmp;
fp->_flags &= ~_IO_IN_BACKUP;
/* Swap _IO_read_end and _IO_save_end. */
tmp = fp->_IO_read_end;
fp->_IO_read_end = fp->_IO_save_end;
fp->_IO_save_end= tmp;
/* Swap _IO_read_base and _IO_save_base. */
tmp = fp->_IO_read_base;
fp->_IO_read_base = fp->_IO_save_base;
fp->_IO_save_base = tmp;
/* Set _IO_read_ptr. */
fp->_IO_read_ptr = fp->_IO_read_base;
}

第四部分 __underflow 它先是经过一些检查后,去调用了vtable中的_IO_file_underflow函数

int
__underflow (FILE *fp)
{
if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1)
return EOF;
if (fp->_mode == 0)
_IO_fwide (fp, -1);
if (_IO_in_put_mode (fp))
if (_IO_switch_to_get_mode (fp) == EOF)
return EOF;
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (_IO_in_backup (fp))
{
_IO_switch_to_main_get_area (fp);
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
}
if (_IO_have_markers (fp))
{
if (save_for_backup (fp, fp->_IO_read_end))
return EOF;
}
else if (_IO_have_backup (fp))
_IO_free_backup_area (fp);
return _IO_UNDERFLOW (fp);
}
libc_hidden_def (__underflow)

在最后_IO_UNDERFLOW,通过vtable调用了_IO_file_underflow函数 Pasted image 20260408211800

int
_IO_new_file_underflow (FILE *fp)
{
ssize_t count;
/* C99 requires EOF to be "sticky". */
if (fp->_flags & _IO_EOF_SEEN)
return EOF;
if (fp->_flags & _IO_NO_READS)
{
fp->_flags |= _IO_ERR_SEEN;
__set_errno (EBADF);
return EOF;
}
if (fp->_IO_read_ptr < fp->_IO_read_end)
return *(unsigned char *) fp->_IO_read_ptr;
if (fp->_IO_buf_base == NULL)
{
/* Maybe we already have a push back pointer. */
if (fp->_IO_save_base != NULL)
{
_IO_free_backup_buf (fp, fp->_IO_save_base);
fp->_flags &= ~_IO_IN_BACKUP;
}
_IO_doallocbuf (fp);
}
/* FIXME This can/should be moved to genops ?? */
if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED))
{
/* We used to flush all line-buffered stream. This really isn't
required by any standard. My recollection is that
traditional Unix systems did this for stdout. stderr better
not be line buffered. So we do just that here
explicitly. --drepper */
_IO_acquire_lock (stdout);
if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF))
== (_IO_LINKED | _IO_LINE_BUF))
_IO_OVERFLOW (stdout, EOF);
_IO_release_lock (stdout);
}
_IO_switch_to_get_mode (fp);
/* This is very tricky. We have to adjust those
pointers before we call _IO_SYSREAD () since
we may longjump () out while waiting for
input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;
count = _IO_SYSREAD (fp, fp->_IO_buf_base,
fp->_IO_buf_end - fp->_IO_buf_base);
if (count <= 0)
{
if (count == 0)
fp->_flags |= _IO_EOF_SEEN;
else
fp->_flags |= _IO_ERR_SEEN, count = 0;
}
fp->_IO_read_end += count;
if (count == 0)
{
/* If a stream is read to EOF, the calling application may switch active
handles. As a result, our offset cache would no longer be valid, so
unset it. */
fp->_offset = _IO_pos_BAD;
return EOF;
}
if (fp->_offset != _IO_pos_BAD)
_IO_pos_adjust (fp->_offset, count);
return *(unsigned char *) fp->_IO_read_ptr;
}
libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)

上来就是很多检查,不过根据程序当前的状态,直接跳过了前面的检查。先去执行了_IO_switch_to_get_mode 函数 可以看到是给_IO_read_base赋值,其他也赋值了不过都是0

int
_IO_switch_to_get_mode (FILE *fp)
{
if (fp->_IO_write_ptr > fp->_IO_write_base)
if (_IO_OVERFLOW (fp, EOF) == EOF)
return EOF;
if (_IO_in_backup (fp))
fp->_IO_read_base = fp->_IO_backup_base;
else
{
fp->_IO_read_base = fp->_IO_buf_base;
if (fp->_IO_write_ptr > fp->_IO_read_end)
fp->_IO_read_end = fp->_IO_write_ptr;
}
fp->_IO_read_ptr = fp->_IO_write_ptr;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_read_ptr;
fp->_flags &= ~_IO_CURRENTLY_PUTTING;
return 0;
}
libc_hidden_def (_IO_switch_to_get_mode)

执行前 Pasted image 20260408213537 执行后 Pasted image 20260408214114

接下来就是赋值

fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base;
fp->_IO_read_end = fp->_IO_buf_base;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end
= fp->_IO_buf_base;

执行后 Pasted image 20260408214358 然后是执行,通过vtavle进入_IO_file_read Pasted image 20260408214540

count = _IO_SYSREAD (fp, fp->_IO_buf_base,

内部调用了read,读入_IO_read_ptr Pasted image 20260408214735 读入后,现在_IO_read_end没有变 Pasted image 20260408214909 执行到

fp->_IO_read_end += count;

的时候才被改变 Pasted image 20260408215248 之后再次循环,不断改变_IO_read_ptr的值 Pasted image 20260408215408 直到结束,退出


Thanks for reading!

fread源码解析

周四 4月 09 2026
2567 字 · 20 分钟