2

The performance comparison of output in RTL monitor

summary

最近在写RTL monitor, 发现如果频繁用$fdisplay写数据出来,性能会成为瓶颈。所以就研究用DPI-C把数据送出来,然后在C侧看看有什么优化手段。有几种优化方法, 一种是写raw data到C侧,C侧直接把raw data格式化输出。 另一种是把raw data存成文件后就返回到RTL侧。然后线下用一个进程把raw data进行格式化输出。线下的进程可以和输出进程同时跑,一旦有数据出来它就进行数据处理并输出。
为了研究这个问题,我直接写个纯C的producer-consumer模型,代码见下面。

conclusion

fwrite在绝大分场景下比mmap快, 当写入量为4GB左右时, fwriter用时10.5 sec, mmap用时29.6 sec.
用fread读入4GB binary数据, 用fwrite写出8.9GB数据时需要103.4 sec
用mmap读入4GB binary数据, 用fwrite写出8.9GB数据时需要88.1 sec
producer比较快, consumer比较慢, producer 2秒可以产生1GB数据,consumer需要20 sec左右才可以处理完。当两者并行跑时,收益并不高。

注意点

open mode

用fopen以写模式("w")打开文件时,再用mmap加载文件时,会因为文件权限不一致无法map成功。
也就是下面的代码虽然在编译时通过,但在运行时会报如下错误
不管如何设置PROT (PROT_WRITE, PROT_WRITE|PROT_READ, PROT_READ|PROT_EXEC), 或者如何设置flags(MAP_SHARED, MAP_PRIVATE), 都会在运行时报错:Permission denied

FILE* f = fopen(argv[1], "w"); // whatever use "w" or "wb"
int *map=(int*)mmap(0, totalbytes, PROT_READ|PROT_WRITE, MAP_SHARED, fileno(f), 0);

解决方法是使用"w+"打开文件, 原因如下:

fopen() modeopen() flags
rO_RDONLY
wO_WRONLY | O_CREAT | O_TRUNC
aO_WRONLY | O_CREAT | O_APPEND
r+O_RDWR
w+O_RDWR | O_CREAT | O_TRUNC
a+O_RDWR | O_CREAT | O_APPEND

write

在写之前需要先把文件设置成相应的大小。可以使用fnctl.h用的ftruncate

size_t totalbytes = 4 + iter*20 + 20 ;
ftruncate(fw, totalbytes);

原型

主要涉及mmap, fopen, open, fread, fwrite,ftruncate几个函数。

#include <sys/mman.h>
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

#include <stdio.h>
FILE *fopen(const char *pathname, const char *mode);
FILE *fdopen(int fd, const char *mode);
FILE *freopen(const char *pathname, const char *mode, FILE *stream);

#include <stdio.h>
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream);
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
//The function fread() reads nmemb items of data, each size bytes long, from the stream pointed to by stream, storing them at the location given by ptr.

实现

fwrite

#include <cstdio>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <chrono>
#include <thread>
#include "timediff.hpp"

struct info {
    uint32_t pc_l;
    uint32_t pc_h;
    uint32_t instr;
    uint32_t time_l;
    uint32_t time_h;
};

int main(int argc, char** argv) {
    if(argc<2) {
        return 1;
    }
    TimerClock tc;

    FILE *f = std::fopen(argv[1], "wb");
    printf("size of info : %d\n", sizeof(info));
    uint32_t i=1;
    tc.start();
    int prec=-9;
    std::fwrite(&prec, sizeof(int), 1, f);
    while(i++<50000000) {
        info val = {(uint32_t)i, (uint32_t)i, (uint32_t)i, (uint32_t)i, (uint32_t)0};
        size_t len = std::fwrite(&val, sizeof(info), 1, f);
        if (len != 1) {
            std::cout << "the len is: " << len << "\n";
            info val = {0,0,0,0,0};
            size_t len = std::fwrite(&val, sizeof(info), 1, f);
            std::fclose(f);
            exit(1);
        } else {
            if ( i%200 == 0) {
                fflush(f);
            }
        }
        //std::this_thread::sleep_for(std::chrono::microseconds(1));
    }
    info val;
    std::memset(&val, 0, sizeof(info));
    size_t len = std::fwrite(&val, sizeof(info), 1, f);
    if (len != 1 ) {
        std::cout << "end error\n";
    }
    std::cout << "writer elapsed time: " << tc.getTimerMicroSec() << "us\n";
    std::fclose(f);
    std::cout << "writer done!\n";
    return 0;
}

fread

#include <cstdio>
#include <cstdint>
#include <iostream>
#include <chrono>
#include <thread>
#include <sys/stat.h>
#include "timediff.hpp"

struct info {
    uint32_t pc_l;
    uint32_t pc_h;
    uint32_t instr;
    uint32_t time_l;
    uint32_t time_h;
};

inline bool is_file_exist(const char* file) {
    struct stat buffer;
    if(stat(file, &buffer)) {
        return false;
    }
    if ( !S_ISREG(buffer.st_mode)) {
        return false;
    }
    return true;
}

int form_line(uint64_t time, int prec, uint64_t pc, uint32_t instr, char * fchar, size_t size) {
    switch (prec) {
    case -15:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -14:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -13:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -12:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -11:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -10:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -9:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -8:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -7:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -6:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -5:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -4:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -3:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case -2:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case -1:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case  0:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "s", pc, instr );
        break;
    default:
        fprintf(stderr, "unknown precision %d\n", prec);
        return 1;
    }
    //printf("%s : %s\n", __FUNCTION, fchar);
    return 0;
}


int main(int argc, char**argv) {
    if(argc<3) return 1;
    TimerClock tc;
    int timeout=0;
    while ( (!is_file_exist(argv[1])) && (timeout< 600000)) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
        timeout++;
    }
    std::cout << "start ...\n";

    FILE *r = std::fopen(argv[1], "rb");
    if (r==NULL) {
        std::cout<< "can't open " << argv[1] << "\n";
        exit(1);
    }
    FILE *w = std::fopen(argv[2], "w");
    if (w==NULL) {
        std::cout<< "can't open " << argv[2] << "\n";
        exit(1);
    }
    tc.start();
    long pos;
    timeout = 0;
    int prec;
    fread(&prec, sizeof(int), 1, r);
    while(1) {
        info val;
        pos = ftell(r);   
        int len = fread(&val, sizeof(info), 1, r);
        if (len!= 1) {
            if(ferror(r)) {
                perror("fread error\n");
                goto FINISHED;
            }
            if(timeout < 10000000) {
                fseek(r, pos, SEEK_SET);
                std::this_thread::sleep_for(std::chrono::microseconds(1));
                timeout++;
                continue;
            } else {
                goto FINISHED;
            }
        } else {
            if ((val.pc_h == 0)&&(val.pc_l==0) && (val.instr==0) && (val.time_h==0) && (val.time_l==0)) {
                break;
            }
        }
        char line[100];
        uint64_t pc = ((uint64_t)val.pc_h << 32) + val.pc_l;
        uint64_t time = ((uint64_t)val.time_h << 32) + val.time_l;
        int r = form_line(time, prec, pc, val.instr, line, sizeof(line));
        if (r==1) {
            return 1;
        }
        std::fputs(line, w);
    }
FINISHED:
    std::cout << "timeout: " << timeout << "\n";
    std::cout << "reader done\n";
    std::cout << "reader elapsed time: " << tc.getTimerMicroSec() << "us\n";
    std::fclose(r);
    std::fclose(w);
    return 0;
}

mmap write

#include <cstdio>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <chrono>
#include <thread>
#include <unistd.h>
#include <fcntl.h>
#include "sys/mman.h"
#include "timediff.hpp"

struct info {
    uint32_t pc_l;
    uint32_t pc_h;
    uint32_t instr;
    uint32_t time_l;
    uint32_t time_h;
};

const int iter = 50000000;

int main(int argc, char** argv) {
    if(argc<2) {
        return 1;
    }
    TimerClock tc;

    int f = open(argv[1], O_RDWR|O_CREAT|O_TRUNC, 0x0777);
    std::cout << "start to write bin file " << argv[1] << "\n";
    size_t totalbytes = 4+iter*20+20;
    ftruncate(f, totalbytes);
    tc.start();
    int prec=-9;
    int *map=(int*)mmap(0, totalbytes, PROT_WRITE, MAP_SHARED, f, 0);
    if (map == MAP_FAILED) {
        close(f);
        perror("error mapping");
        exit(1);
    }
    size_t wptr=0;
    map[wptr++] = prec;
    uint32_t i=0;
    while(i++<iter) {
        map[wptr++] = i;
        map[wptr++] = i;
        map[wptr++] = i;
        map[wptr++] = i;
        map[wptr++] = 0;
        //std::this_thread::sleep_for(std::chrono::microseconds(1));
    }
    map[wptr++] = 0;
    map[wptr++] = 0;
    map[wptr++] = 0;
    map[wptr++] = 0;
    map[wptr++] = 0;
    if(msync(map, totalbytes, MS_SYNC) == -1) {
        perror("could not sync the file to disk");
    }
    if(munmap(map, totalbytes) == -1) {
        close(f);
        perror("error unmap the file");
        exit(1);
    }
    close(f);
    std::cout << "writer elapsed time: " << tc.getTimerMicroSec() << "us\n";
    close(f);
    std::cout << "writer done!\n";
    return 0;
}

mmap read

#include <cstdio>
#include <cstdint>
#include <iostream>
#include <vector>
#include <unistd.h>
#include <fcntl.h>
#include <chrono>
#include <thread>
#include <sys/stat.h>
#include "timediff.hpp"
#include <sys/mman.h>

struct info {
    uint32_t pc_l;
    uint32_t pc_h;
    uint32_t instr;
    uint32_t time_l;
    uint32_t time_h;
};

inline bool is_file_exist(const char* file) {
    struct stat buffer;
    if(stat(file, &buffer)) {
        return false;
    }
    if ( !S_ISREG(buffer.st_mode)) {
        return false;
    }
    return true;
}

int form_line(uint64_t time, int prec, uint64_t pc, uint32_t instr, char * fchar, size_t size) {
    switch (prec) {
    case -15:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -14:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -13:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "fs", pc, instr );
        break;
    case -12:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -11:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -10:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ps", pc, instr );
        break;
    case -9:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -8:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -7:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ns", pc, instr );
        break;
    case -6:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -5:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -4:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "us", pc, instr );
        break;
    case -3:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case -2:
        snprintf(fchar, size, "%lu0 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case -1:
        snprintf(fchar, size, "%lu00 %s PC=0x%016lx, 0x%08x \n", time, "ms", pc, instr );
        break;
    case  0:
        snprintf(fchar, size, "%lu %s PC=0x%016lx, 0x%08x \n", time, "s", pc, instr );
        break;
    default:
        fprintf(stderr, "unknown precision %d\n", prec);
        return 1;
    }
    //printf("%s : %s\n", __FUNCTION, fchar);
    return 0;
}


int main(int argc, char**argv) {
    if(argc<3) return 1;
    TimerClock tc;
    int timeout=0;
    while ( (!is_file_exist(argv[1])) && (timeout< 600000)) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
        timeout++;
    }
    std::cout << "start ...\n";

    int r = open(argv[1], O_RDONLY, 0x0600);
    if (r==-1) {
        std::cout<< "can't open " << argv[1] << "\n";
        exit(1);
    }
    FILE *w = std::fopen(argv[2], "w");
    if (w==nullptr) {
        std::cout<< "can't open " << argv[2] << "\n";
        close(r);
        exit(1);
    }
    struct stat rfileInfo = {0};
    struct stat wfileInfo = {0};
    if(fstat(r, &rfileInfo) == -1) {
        exit(1);
    }
    if(fstat(fileno(w), &wfileInfo) == -1) {
        exit(1);
    }
    std::vector<int> segment;
    size_t cursize;
    size_t lastpos;
    size_t pagesize = sysconf(_SC_PAGE_SIZE);
    size_t readto;
    int prec;
    off_t pa_offset;

    timeout =0;
    while((rfileInfo.st_size == 0)&&(timeout<600000)) {
        std::this_thread::sleep_for(std::chrono::milliseconds(1));
        timeout++;
    }
    if (timeout >= 600000) {
        goto FINISHED;
    }
    printf("File size is %ji\n", (intmax_t)rfileInfo.st_size);

    cursize  = rfileInfo.st_size;
    lastpos = 0;
    if (cursize<24) {
        std::this_thread::sleep_for(std::chrono::milliseconds(100));
    }
    readto = cursize - (cursize-4) % 20;

    tc.start();

    while(1) {

        pa_offset = lastpos & ~(pagesize-1);
        size_t size = readto-pa_offset;
        size_t pass_byte = lastpos-pa_offset;   
        std::cout << "size = " << size << "\n";
        std::cout << "lastpos =" << lastpos << "\n";
        char *rmap = (char*)mmap(0, size, PROT_READ, MAP_SHARED, r, pa_offset);
        if(rmap == MAP_FAILED) {
            perror("error mapping");
            goto FINISHED;
        }
        size_t rindex=0;
        if (lastpos==0) {
            prec = rmap[rindex++];
        }
        lastpos = readto;
        int * rptr = (int*)(rmap+pass_byte);
        int total = (size-pass_byte)/4;

        while(rindex<total) {
            info val;
            val.pc_l=rptr[rindex++];
            val.pc_h=rptr[rindex++];
            val.instr=rptr[rindex++];
            val.time_l=rptr[rindex++];
            val.time_h=rptr[rindex++];
            if ((val.pc_h == 0)&&(val.pc_l==0) && (val.instr==0) && (val.time_h==0) && (val.time_l==0)) {
                munmap(rmap, size);
                std::cout << "reach end\n";
                goto FINISHED;
            }
            char line[100];
            uint64_t pc = ((uint64_t)val.pc_h << 32) + val.pc_l;
            uint64_t time = ((uint64_t)val.time_h << 32) + val.time_l;
            int rlt = form_line(time, prec, pc, val.instr, line, sizeof(line));
            if (rlt==1) {
                goto FINISHED;
            }
            std::fputs(line, w);
        }
        munmap(rmap, size);
        if(fstat(r, &rfileInfo) == -1) {
            goto FINISHED;
        }
        cursize = rfileInfo.st_size;
        int watchdog =0;
        while ((cursize<(lastpos+20))||(watchdog++<300)) {
            std::this_thread::sleep_for(std::chrono::milliseconds(1000));
            if(fstat(r, &rfileInfo) == -1) {
                std::cout<< "can't get file size\n";
                goto FINISHED;
            }
            cursize = rfileInfo.st_size;
        }
        if (watchdog >= 30000) {
            goto FINISHED;
        }
        readto = cursize - (cursize-4) % 20;
        segment.push_back(readto);
    }
FINISHED:
    std::cout << "timeout: " << timeout << "\n";
    std::cout << "reader done\n";
    std::cout << "reader elapsed time: " << tc.getTimerMicroSec() << "us\n";
    close(r);
    std::fclose(w);
    std::cout << "the segment is :\n";
    for(auto it: segment) {
        std::cout <<  it << "\n";
    }
    return 0;
}

timer

#ifndef _TIMEDIFF_HPP_
#define _TIMEDIFF_HPP_

#include <iostream>
#include <chrono>

class TimerClock
{
public:
    TimerClock()
    {
        update();
    }

    ~TimerClock()
    {
    }

    void start()
    {
        _start = std::chrono::high_resolution_clock::now();
    }

    long long getTimerMicroSec()
    {
        return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - _start).count();
    }
private:
    std::chrono::time_point<std::chrono::high_resolution_clock>_start;
};

#endif

compile

CXXFLAGS = -g -std=c++11
gen: reader writer

FILES = test.bin test.log test1.log test2.log

reader: reader.cpp
    g++ ${CXXFLAGS} -o $@ $^ -I./

writer: writer.cpp
    g++ ${CXXFLAGS} -o $@ $^ -I./

writer2: writer2.cpp
    g++ ${CXXFLAGS} -o $@ $^ -I./

run: clean
    (./writer &) && (sleep 1) && (./reader &)

runw:
    rm -rf /tmp/test.txt
    time ./writer &

runr:
    rm -rf /tmp/out.txt /tmp/test.txt
    time ./reader &

clean:
    - rm -rf ${FILES} reader writer reader2 writer2 writer3

genbin:
    ./writer ../../tarmac.core0.log.bin
    ./writer ../../tarmac.core1.log.bin


diff:
    tail out.txt | diff golden.txt -

.PHONY: gen clean

harriszh
338 声望131 粉丝

做些有趣的事,留些有用的存在


引用和评论

0 条评论