前置知识
在分析fread函数时,应该先明确一下输入缓冲区是怎么来的。
首先fread函数会先将数据读到输入缓冲区中,然后从输入缓冲区中执行memcpy函数,拷贝一定字节的数据到我们指定的内存地址上。而这个输入缓冲区是从哪到哪呢?由两个指针分别声明了这片区域的开始和结束,他们分别叫做_IO_read_base和_IO_read_end,他们之间的区域就是输入缓冲区。这样看起来似乎还需要一个输出缓冲区,难道需要malloc申请两个堆块来分别表示输入缓冲区和输出缓冲区么?非也,其实malloc函数自始至终只申请了一个堆块,这个堆块的区域也叫做reserve area,而_IO_buf_base和_IO_buf_end两个指针则分别声明了这个reserve area的始末。然后将 _IO_read_ptr; _IO_read_end; _IO_read_base; _IO_write_base; _IO_write_ptr; _IO_write_end;这六个指针全部初始化为了_IO_buf_base的值,现在的输入缓冲区和输出缓冲区还不存在(因为现在 _IO_read_end和 _IO_read_base的值相同),以输入缓冲区为例,读入数据时是执行了系统调用read,而此时的数据是在reserve area中,紧接着 _IO_read_end就会加上刚刚读入的数据的个数,那么此时 _IO_read_end和 _IO_read_base的值变的不同了。而现在这二者之间的区域就成为输入缓冲区
再提一下_IO_FILE结构体中的一些指针(如下)
char* _IO_read_ptr; /* Current read pointer */char* _IO_read_end; /* End of get area. */char* _IO_read_base; /* Start of putback+get area. */char* _IO_write_base; /* Start of put area. */char* _IO_write_ptr; /* Current put pointer. */char* _IO_write_end; /* End of put area. */char* _IO_buf_base; /* Start of reserve area. */char* _IO_buf_end; /* End of reserve area. */其中_IO_buf_base 和 _IO_buf_end两个指针的作用分别是标明reserve area的始末。_IO_read_base 和 _IO_read_end两个指针的作用分别是标明输入缓冲区的始末(write那两个指针同理),现在假设有一个30字节的flag文件,然后我连续执行两次fread函数,每次从文件中只读10字节,那么第二次执行fread函数是从哪开始读呢,很明显并不是文件的开始来读取了,而是接着上回fread函数读到的位置,继续读10字节数据。但我们怎么去记录上回fread函数读到哪了呢,这就需要用到_IO_read_ptr指针了,它是来记录下一次数据应该从输入缓冲区的哪里开始读了。也就是说_IO_read_base 和 _IO_read_ptr 之间的区域是已经使用了的输入缓冲区,而 _IO_read_ptr 和 _IO_read_end之间的区域是输入缓冲区的剩余部分(也就是还未使用部分)。
通过上面这两段文字,应该可以对刚学习IO的师傅对_IO_FILE结构体中表示缓冲区位置的指针有一些了解了。
同时这次还要提到vtable,它是_IO_FILE_plus结构体中的一个字段,也是一个虚表指针。它指向了_IO_jump_t结构体
struct _IO_FILE_plus{ _IO_FILE file; const struct _IO_jump_t *vtable;};对于_IO_jump_t结构体,我目前的理解它就是一个跳转表,这里放的都是函数指针。通过不同的偏移获取不同的函数指针,然后将其调用。
struct _IO_jump_t{ JUMP_FIELD(size_t, __dummy); JUMP_FIELD(size_t, __dummy2); JUMP_FIELD(_IO_finish_t, __finish); JUMP_FIELD(_IO_overflow_t, __overflow); JUMP_FIELD(_IO_underflow_t, __underflow); JUMP_FIELD(_IO_underflow_t, __uflow); JUMP_FIELD(_IO_pbackfail_t, __pbackfail); /* showmany */ JUMP_FIELD(_IO_xsputn_t, __xsputn); JUMP_FIELD(_IO_xsgetn_t, __xsgetn); JUMP_FIELD(_IO_seekoff_t, __seekoff); JUMP_FIELD(_IO_seekpos_t, __seekpos); JUMP_FIELD(_IO_setbuf_t, __setbuf); JUMP_FIELD(_IO_sync_t, __sync); JUMP_FIELD(_IO_doallocate_t, __doallocate); JUMP_FIELD(_IO_read_t, __read); JUMP_FIELD(_IO_write_t, __write); JUMP_FIELD(_IO_seek_t, __seek); JUMP_FIELD(_IO_close_t, __close); JUMP_FIELD(_IO_stat_t, __stat); JUMP_FIELD(_IO_showmanyc_t, __showmanyc); JUMP_FIELD(_IO_imbue_t, __imbue);#if 0 get_column; set_column;#endif};整体流程


源码解析
- 实际fread进入的是_IO_fread
#include "libioP.h"
size_t_IO_fread (void *buf, size_t size, size_t count, FILE *fp){ size_t bytes_requested = size * count; size_t bytes_read; CHECK_FILE (fp, 0); if (bytes_requested == 0) return 0; _IO_acquire_lock (fp); bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested); _IO_release_lock (fp); return bytes_requested == bytes_read ? count : bytes_read / size;}libc_hidden_def (_IO_fread)weak_alias (_IO_fread, fread)
# ifndef _IO_MTSAFE_IOstrong_alias (_IO_fread, __fread_unlocked)libc_hidden_def (__fread_unlocked)weak_alias (_IO_fread, fread_unlocked)# endif
- 进入_IO_sgetn

size_t_IO_sgetn (FILE *fp, void *data, size_t n){ /* FIXME handle putback buffer here! */ return _IO_XSGETN (fp, data, n);}libc_hidden_def (_IO_sgetn)IO_JUMPS_FUNC(THIS)返回的是_IO_jump_t地址,对应FILE结构体的vtable字段,由于JUMP2第一个参数就是__xsgetn,所以->FUNC访问的就是_IO_jump_t结构体中__xsgetn,采用的是vtable + offset的形式(借此可以伪造vtable,实现任意合法执行)
#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N)#define JUMP2(FUNC, THIS, X1, X2) (_IO_JUMPS_FUNC(THIS)->FUNC) (THIS, X1, X2)# define _IO_JUMPS_FUNC(THIS) (IO_validate_vtable (_IO_JUMPS_FILE_plus (THIS)))static inline const struct _IO_jump_t *IO_validate_vtable (const struct _IO_jump_t *vtable){ uintptr_t ptr = (uintptr_t) vtable; uintptr_t offset = ptr - (uintptr_t) &__io_vtables; if (__glibc_unlikely (offset >= IO_VTABLES_LEN)) /* The vtable pointer is not in the expected section. Use the slow path, which will terminate the process if necessary. */ _IO_vtable_check (); return vtable;}void attribute_hidden_IO_vtable_check (void){#ifdef SHARED /* Honor the compatibility flag. */ void (*flag) (void) = atomic_load_relaxed (&IO_accept_foreign_vtables); PTR_DEMANGLE (flag); if (flag == &_IO_vtable_check) return;
/* In case this libc copy is in a non-default namespace, we alwaysneed to accept foreign vtables because there is always apossibility that FILE * objects are passed across the linkingboundary. */ { Dl_info di; struct link_map *l; if (!rtld_active () || (_dl_addr (_IO_vtable_check, &di, &l, NULL) != 0 && l->l_ns != LM_ID_BASE)) return; }
#else /* !SHARED */ /* We cannot perform vtable validation in the static dlopen case because FILE * handles might be passed back and forth across the boundary. Therefore, we disable checking in this case. */ if (__dlopen != NULL) return;#endif
__libc_fatal ("Fatal error: glibc detected an invalid stdio handle\n");
}0x7ffff7e26661 <_IO_sgetn+33> jmp qword ptr [rax + 0x40] <__GI__IO_file_xsgetn>

- 进入__GI__IO_file_xsgetn

size_t_IO_file_xsgetn (FILE *fp, void *data, size_t n){ size_t want, have; ssize_t count; char *s = data; want = n;
/*第一部分*/ //如果没有申请buf,_IO_doallocbuf内部malloc一个 if (fp->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ if (fp->_IO_save_base != NULL) { _IO_free_backup_buf (fp, fp->_IO_save_base); fp->_flags &= ~_IO_IN_BACKUP; } _IO_doallocbuf (fp); }
while (want > 0) { /*第二部分*/ have = fp->_IO_read_end - fp->_IO_read_ptr; if (want <= have) { memcpy (s, fp->_IO_read_ptr, want); fp->_IO_read_ptr += want; want = 0; } else { if (have > 0) { s = __mempcpy (s, fp->_IO_read_ptr, have); want -= have; fp->_IO_read_ptr += have; }
/*第三部分*/ /* Check for backup and repeat */ if (_IO_in_backup (fp)) { _IO_switch_to_main_get_area (fp); continue; }
/* If we now want less than a buffer, underflow and repeat the copy. Otherwise, _IO_SYSREAD directly to the user buffer. */ /*第四部分*/ if (fp->_IO_buf_base && want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base)) { if (__underflow (fp) == EOF) break; continue; }
/* These must be set before the sysread as we might longjmp out waiting for input. */ _IO_setg (fp, fp->_IO_buf_base, fp->_IO_buf_base, fp->_IO_buf_base); _IO_setp (fp, fp->_IO_buf_base, fp->_IO_buf_base);
/* Try to maintain alignment: read a whole number of blocks. */ count = want; if (fp->_IO_buf_base) { size_t block_size = fp->_IO_buf_end - fp->_IO_buf_base; if (block_size >= 128) count -= want % block_size; }
count = _IO_SYSREAD (fp, s, count); if (count <= 0) { if (count == 0) fp->_flags |= _IO_EOF_SEEN; else fp->_flags |= _IO_ERR_SEEN; break; }
s += count; want -= count; if (fp->_offset != _IO_pos_BAD) _IO_pos_adjust (fp->_offset, count); } }
return n - want;}libc_hidden_def (_IO_file_xsgetn)第一部分 3.1 进入_IO_doallocbuf 
void_IO_doallocbuf (FILE *fp){ if (fp->_IO_buf_base) return; if (!(fp->_flags & _IO_UNBUFFERED) || fp->_mode > 0) if (_IO_DOALLOCATE (fp) != EOF) return; _IO_setb (fp, fp->_shortbuf, fp->_shortbuf+1, 0);}libc_hidden_def (_IO_doallocbuf)3.2通过vtable进入_IO_file_doallocate
3.3进入vtable中的_IO_file_stat函数,也是通过vtable+offset
3.4调用fstat64,这个系统调用是来获取文件状态,并且初始化st结构体的
可以看到此时的st_blksize为4096
3.5而这个st_blksize也就是接下来malloc申请的内存大小
3.6调用_IO_setb ,这个函数主要就是对_IO_buf_base和_IO_buf_end指针进行赋值
void_IO_setb (FILE *f, char *b, char *eb, int a){ if (f->_IO_buf_base && !(f->_flags & _IO_USER_BUF)) free (f->_IO_buf_base); f->_IO_buf_base = b; f->_IO_buf_end = eb; if (a) f->_flags &= ~_IO_USER_BUF; else f->_flags |= _IO_USER_BUF;}libc_hidden_def (_IO_setb)执行前
执行后
第二部分 have是剩余的缓冲区大小,want是需要读入的大小 如果want小于等于have直接memcpy 如果have还有剩余但是want大于have,把能读的先读完
have = fp->_IO_read_end - fp->_IO_read_ptr; if (want <= have) { memcpy (s, fp->_IO_read_ptr, want); fp->_IO_read_ptr += want; want = 0; } else { if (have > 0) { s = __mempcpy (s, fp->_IO_read_ptr, have); want -= have; fp->_IO_read_ptr += have; }第三部分
如果当前处于备用的缓冲区,切换为主缓冲区
/* Check for backup and repeat */ if (_IO_in_backup (fp)) { _IO_switch_to_main_get_area (fp); continue; }#define _IO_in_backup(fp) ((fp)->_flags & _IO_IN_BACKUP)void_IO_switch_to_main_get_area (FILE *fp){ char *tmp; fp->_flags &= ~_IO_IN_BACKUP; /* Swap _IO_read_end and _IO_save_end. */ tmp = fp->_IO_read_end; fp->_IO_read_end = fp->_IO_save_end; fp->_IO_save_end= tmp; /* Swap _IO_read_base and _IO_save_base. */ tmp = fp->_IO_read_base; fp->_IO_read_base = fp->_IO_save_base; fp->_IO_save_base = tmp; /* Set _IO_read_ptr. */ fp->_IO_read_ptr = fp->_IO_read_base;}第四部分 __underflow 它先是经过一些检查后,去调用了vtable中的_IO_file_underflow函数
int__underflow (FILE *fp){ if (_IO_vtable_offset (fp) == 0 && _IO_fwide (fp, -1) != -1) return EOF; if (fp->_mode == 0) _IO_fwide (fp, -1); if (_IO_in_put_mode (fp)) if (_IO_switch_to_get_mode (fp) == EOF) return EOF; if (fp->_IO_read_ptr < fp->_IO_read_end) return *(unsigned char *) fp->_IO_read_ptr; if (_IO_in_backup (fp)) { _IO_switch_to_main_get_area (fp); if (fp->_IO_read_ptr < fp->_IO_read_end) return *(unsigned char *) fp->_IO_read_ptr; } if (_IO_have_markers (fp)) { if (save_for_backup (fp, fp->_IO_read_end)) return EOF; } else if (_IO_have_backup (fp)) _IO_free_backup_area (fp); return _IO_UNDERFLOW (fp);}libc_hidden_def (__underflow)在最后_IO_UNDERFLOW,通过vtable调用了_IO_file_underflow函数 
int_IO_new_file_underflow (FILE *fp){ ssize_t count; /* C99 requires EOF to be "sticky". */ if (fp->_flags & _IO_EOF_SEEN) return EOF; if (fp->_flags & _IO_NO_READS) { fp->_flags |= _IO_ERR_SEEN; __set_errno (EBADF); return EOF; } if (fp->_IO_read_ptr < fp->_IO_read_end) return *(unsigned char *) fp->_IO_read_ptr; if (fp->_IO_buf_base == NULL) { /* Maybe we already have a push back pointer. */ if (fp->_IO_save_base != NULL) { _IO_free_backup_buf (fp, fp->_IO_save_base); fp->_flags &= ~_IO_IN_BACKUP; } _IO_doallocbuf (fp); } /* FIXME This can/should be moved to genops ?? */ if (fp->_flags & (_IO_LINE_BUF|_IO_UNBUFFERED)) {
/* We used to flush all line-buffered stream. This really isn't required by any standard. My recollection is that traditional Unix systems did this for stdout. stderr better not be line buffered. So we do just that here explicitly. --drepper */
_IO_acquire_lock (stdout); if ((stdout->_flags & (_IO_LINKED | _IO_NO_WRITES | _IO_LINE_BUF)) == (_IO_LINKED | _IO_LINE_BUF)) _IO_OVERFLOW (stdout, EOF); _IO_release_lock (stdout); }
_IO_switch_to_get_mode (fp);
/* This is very tricky. We have to adjust those pointers before we call _IO_SYSREAD () since we may longjump () out while waiting for input. Those pointers may be screwed up. H.J. */
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base; fp->_IO_read_end = fp->_IO_buf_base; fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_buf_base;
count = _IO_SYSREAD (fp, fp->_IO_buf_base, fp->_IO_buf_end - fp->_IO_buf_base); if (count <= 0) { if (count == 0) fp->_flags |= _IO_EOF_SEEN; else fp->_flags |= _IO_ERR_SEEN, count = 0; } fp->_IO_read_end += count; if (count == 0) {
/* If a stream is read to EOF, the calling application may switch active handles. As a result, our offset cache would no longer be valid, so unset it. */
fp->_offset = _IO_pos_BAD; return EOF; } if (fp->_offset != _IO_pos_BAD) _IO_pos_adjust (fp->_offset, count); return *(unsigned char *) fp->_IO_read_ptr;}
libc_hidden_ver (_IO_new_file_underflow, _IO_file_underflow)上来就是很多检查,不过根据程序当前的状态,直接跳过了前面的检查。先去执行了_IO_switch_to_get_mode 函数 可以看到是给_IO_read_base赋值,其他也赋值了不过都是0
int_IO_switch_to_get_mode (FILE *fp){ if (fp->_IO_write_ptr > fp->_IO_write_base) if (_IO_OVERFLOW (fp, EOF) == EOF) return EOF; if (_IO_in_backup (fp)) fp->_IO_read_base = fp->_IO_backup_base; else { fp->_IO_read_base = fp->_IO_buf_base; if (fp->_IO_write_ptr > fp->_IO_read_end) fp->_IO_read_end = fp->_IO_write_ptr; } fp->_IO_read_ptr = fp->_IO_write_ptr;
fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_read_ptr;
fp->_flags &= ~_IO_CURRENTLY_PUTTING; return 0;}
libc_hidden_def (_IO_switch_to_get_mode)执行前
执行后 
接下来就是赋值
fp->_IO_read_base = fp->_IO_read_ptr = fp->_IO_buf_base; fp->_IO_read_end = fp->_IO_buf_base; fp->_IO_write_base = fp->_IO_write_ptr = fp->_IO_write_end = fp->_IO_buf_base;执行后
然后是执行,通过vtavle进入_IO_file_read 
count = _IO_SYSREAD (fp, fp->_IO_buf_base,内部调用了read,读入_IO_read_ptr
读入后,现在_IO_read_end没有变
执行到
fp->_IO_read_end += count;的时候才被改变
之后再次循环,不断改变_IO_read_ptr的值
直到结束,退出
