Redis – epoll

Redis 对 epoll 的封装其实也是类似的,使用 epoll_create 创建 epoll 中使用的 epfd

static int aeApiCreate(aeEventLoop *eventLoop) {
    aeApiState *state = zmalloc(sizeof(aeApiState));

    if (!state) return -1;
    state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
    if (!state->events) {
        zfree(state);
        return -1;
    }
    state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */
    if (state->epfd == -1) {
        zfree(state->events);
        zfree(state);
        return -1;
    }
    eventLoop->apidata = state;
    return 0;
}

在 aeApiAddEvent 中使用 epoll_ctl 向 epfd 中添加需要监控的 FD 以及监听的事件:

static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
    aeApiState *state = eventLoop->apidata;
    struct epoll_event ee = {0}; /* avoid valgrind warning */
    /* If the fd was already monitored for some event, we need a MOD
     * operation. Otherwise we need an ADD operation. */
    int op = eventLoop->events[fd].mask == AE_NONE ?
            EPOLL_CTL_ADD : EPOLL_CTL_MOD;

    ee.events = 0;
    mask |= eventLoop->events[fd].mask; /* Merge old events */
    if (mask & AE_READABLE) ee.events |= EPOLLIN;
    if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
    ee.data.fd = fd;
    if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
    return 0;
}

由于 epoll 相比 select 机制略有不同,在 epoll_wait 函数返回时并不需要遍历所有的 FD 查看读写情况;在 epoll_wait 函数返回时会提供一个 epoll_event 数组:

typedef union epoll_data {
    void    *ptr;
    int      fd; /* 文件描述符 */
    uint32_t u32;
    uint64_t u64;
} epoll_data_t;

struct epoll_event {
    uint32_t     events; /* Epoll 事件 */
    epoll_data_t data;
};

aeApiPoll 函数只需要将 epoll_event 数组中存储的信息加 入  eventLoop  的  fired 数组中,将信息传递给上层模块:

static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
    aeApiState *state = eventLoop->apidata;
    int retval, numevents = 0;

    retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
            tvp ? (tvp->tv_sec*1000 + tvp->tv_usec/1000) : -1);
    if (retval > 0) {
        int j;

        numevents = retval;
        for (j = 0; j < numevents; j++) {
            int mask = 0;
            struct epoll_event *e = state->events+j;

            if (e->events & EPOLLIN) mask |= AE_READABLE;
            if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
            if (e->events & EPOLLERR) mask |= AE_WRITABLE;
            if (e->events & EPOLLHUP) mask |= AE_WRITABLE;
            eventLoop->fired[j].fd = e->data.fd;
            eventLoop->fired[j].mask = mask;
        }
    }
    return numevents;
}

Select

int fd = /* file descriptor */

fd_set rfds;
FD_ZERO(&rfds);
FD_SET(fd, &rfds)

for ( ; ; ) {
    select(fd+1, &rfds, NULL, NULL, NULL);
    if (FD_ISSET(fd, &rfds)) {
        /* file descriptor `fd` becomes readable */
    }
}
  1. 初始化一个可读的 fd_set 集合,保存需要监控可读性的 FD;
  2. 使用 FD_SET 将 fd 加入 rfds
  3. 调用 select 方法监控 rfds 中的 FD 是否可读;
  4. 当 select 返回时,检查 FD 的状态并完成对应的操作。

在 Redis 的 ae_select 文件中代码的组织顺序也是差不多的,首先在 aeApiCreate 函数中初始化 rfds 和 wfds

static int aeApiCreate(aeEventLoop *eventLoop) {
    aeApiState *state = zmalloc(sizeof(aeApiState));
    if (!state) return -1;
    FD_ZERO(&state->rfds);
    FD_ZERO(&state->wfds);
    eventLoop->apidata = state;
    return 0;
}

aeApiAddEvent 和 aeApiDelEvent 会通过 FD_SET 和 FD_CLR 修改 fd_set 中对应 FD 的标志位:

static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
    aeApiState *state = eventLoop->apidata;
    if (mask & AE_READABLE) FD_SET(fd,&state->rfds);
    if (mask & AE_WRITABLE) FD_SET(fd,&state->wfds);
    return 0;
}

整个 ae_select 子模块中最重要的函数就是 aeApiPoll,它是实际调用 select 函数的部分,其作用就是在 I/O 多路复用函数返回时,将对应的 FD 加入  aeEventLoop 的  fired 数组中,并返回事件的个数:

static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
    aeApiState *state = eventLoop->apidata;
    int retval, j, numevents = 0;

    memcpy(&state->_rfds,&state->rfds,sizeof(fd_set));
    memcpy(&state->_wfds,&state->wfds,sizeof(fd_set));

    retval = select(eventLoop->maxfd+1,
                &state->_rfds,&state->_wfds,NULL,tvp);
    if (retval > 0) {
        for (j = 0; j <= eventLoop->maxfd; j++) {
            int mask = 0;
            aeFileEvent *fe = &eventLoop->events[j];

            if (fe->mask == AE_NONE) continue;
            if (fe->mask & AE_READABLE && FD_ISSET(j,&state->_rfds))
                mask |= AE_READABLE;
            if (fe->mask & AE_WRITABLE && FD_ISSET(j,&state->_wfds))
                mask |= AE_WRITABLE;
            eventLoop->fired[numevents].fd = j;
            eventLoop->fired[numevents].mask = mask;
            numevents++;
        }
    }
    return numevents;
}

未知疯狂

昨天考完了DS。

这学期还剩一门DP就结束了,但是今天坐在已经没什么人的图书馆里却没有什么复习的心思。

正逢国内的校招季,身边的朋友们也都开始了自己新的旅程。有大四的朋友毕业去找了工作,有的选择了去国外继续深造。研究生的朋友们要不选择了直博,要不就要加入求职大军。

仔细想想,其实我的机会还是蛮多的,但就是怕不能把握住。希望假期里可以想明白这个事情,然后朝着想去的方向奋斗吧。

Miscellaneous

过了周五,这个学期就要告以段落了。

然后接下来就是一周的疯狂复习了,加上考试了。

这段时间一直没有更新blog, 感觉还是应该偶尔记一点流水账的。今天早上起来巨冷,于是坐了Tram去学校,打算好好学几个小时习来着,但是被昨天下载的游戏耽误了好多时间> – <

在Tram上无意看到了昨天在超市买厨房打火机被多收了很多钱,所以下了Tram之后就直奔Caltron的ANZ去问了一下,结果人家并不怎么care这件事,说是直接去找Merchant就好。接着去修车行取了昨天拜托修的自行车,貌似除了链条还有很多问题,估计得我自己订零件修了(车行修车太贵了)

晚上和Angle去吃了晚饭,看了中华剧社的话剧《疯子,你好》,感觉故事很不错,最后的结尾挺让人感动的。把Angle送回家之后,我去找昨天的超市撕逼,结果人家的退款速度简直一气呵成,我也就只能All good了。

下周估计又是一场硬仗了(毕竟感觉这个学期没有好好学习。。),希望考完试可以有一个愉快的新旅程LoL

Musician

Let’s say there are two persons, Composer and Performer.

The Composer randomly selects three different Pitch which constructed by two part, Note and Octave. Note in the range of “A” to “G”, Octave in the range of “1” to “3”.

For example, here is a typical Pitch combined by ‘A’ and ‘1’, in which ‘A’ is the Note, and ‘1’ is the Octave.

Once Composer selected a Combination, Performer needs to guess it as quick as possible. After each guess, Performer get a feedback which indicates that:

  • how many pitches in the guess are included in the target (correct pitches)
  • how many pitches have the right note but the wrong octave (correct notes)
  • how many pitches have the right octave but the wrong note (correct octaves)

Now, the question is how to get the corrected answer as less times as possible.

The core idea is that when we get a feedback from the previous guess, we’d check out combinations that have identical feedback in the remind possible set, in which the problem scale can be reduced effectively.

Now, the question is how to get the corrected answer as less times as possible.

The core idea is that when we get a feedback from the previous guess, we are able to check out combinations that have identical feedback in the remaining possible set, by which the problem scale can be pruned effectively.

possible set, in which the problem scale can be reduced effectively.

--  Subject  : UniMelb 2019 SM1 COMP90048 Declarative Programming
--  File     : Proj1.hs
--  Author   : Mingzhe Du
--  Origin   : Mon Apr 8 2019 
--  Purpose  : This program for guessing a target chord. In each round of the game,
--             the program will generate a chord from a possible set, and then a feedback 
--             against the guess will be given. Depanding on these feedbacks, the aim of this
--             program is get the correct chord with as less as possible guess times.


module Proj1 (Pitch, GameState, toPitch, feedback, initialGuess, nextGuess) where

-- Pitch structure
data Pitch = Pitch { note :: Char, 
                     octave :: Char   
                   } deriving (Eq)
instance Show Pitch where
    show (Pitch note octave) = [note, octave]

-- Game State
data GameState = GameState { times :: Int,                  -- Guess times
                             cCombinations :: [[[Char]]]    -- Possible set
                           } deriving (Eq, Show)

-- Converting String to Pitch                          
toPitch :: String -> Maybe Pitch
toPitch (a:b:t)
    | not (null t) = Nothing
    | (elem note' ['A'..'G']) && (elem octave' ['1'..'3']) = Just Pitch {note = note', octave = octave'}
    | otherwise = Nothing
    where note' = a
          octave' = b

-- Comparing target chord and guessed chord
feedback :: [Pitch] -> [Pitch] -> (Int,Int,Int)
feedback pitch_a pitch_b
    | (length pitch_a == 3) && (length pitch_b == 3) = (c_p, c_n - c_p, c_o - c_p)
    | otherwise = (0,0,0)
    where get_key key = foldr (\x acc -> key x:acc) []
          c_p = getCounter pitch_a pitch_b 0                              -- Correct pitches
          c_n = getCounter (map note pitch_a) (map note pitch_b) 0        -- Correct notes
          c_o = getCounter (map octave pitch_a) (map octave pitch_b) 0    -- Correct Octaves

-- Initial guess
initialGuess :: ([Pitch], GameState)
initialGuess = (currentGuess, gameState)
    where currentGuess = combinationToPitch  (cGuess)
          gameState = GameState 0 all_combinations 
          all_items = getCombination "ABCDEFG" "123"
          cGuess:all_combinations = subsequencesOfSize 3 all_items        -- New guess and new possible set
          getCombination p_note p_octave = foldr (\x acc -> (map (\y -> y:[x]) p_note) ++ acc) [] p_octave
          combinationToPitch combinations = map (\(Just x) -> x) $ map toPitch combinations     -- Converting String to Pitch

-- Get the next guess
nextGuess :: ([Pitch], GameState) -> (Int,Int,Int) -> ([Pitch],GameState)
nextGuess (pGuess, pGameState) pFeedback = (cGuess, cGameState)
    where cGuess':cCombs = getNewCombination pGuess pCombinations pFeedback
          pCombinations = cCombinations pGameState
          cGuess = toChord cGuess'
          cGameState = GameState ((times pGameState) + 1) cCombs 
          toChord = map (\x -> Pitch (x !! 0) (x !! 1))

-- Help Functions 

-- remove an item from a list
removeItem :: (Eq a) => a -> [a] -> [a]
removeItem _ [] = []
removeItem x (y:ys) 
    | x == y = ys
    | otherwise = y : removeItem x ys 

-- get the number of same elements in two lists
getCounter :: (Eq a) => [a] -> [a] -> Int -> Int
getCounter [] y c = c
getCounter  (x:xs) y c
    | elem x y = getCounter xs (removeItem x y) (c+1)
    | otherwise = getCounter xs y c
 
-- Generate combinations by a specifc size    
subsequencesOfSize :: Int -> [a] -> [[a]]
subsequencesOfSize n xs = let l = length xs in if n>l then [] else subsequencesBySize xs !! (l-n)
    where subsequencesBySize [] = [[[]]]
          subsequencesBySize (x:xs) = let next = subsequencesBySize xs in zipWith (++) ([]:next) (map (map (x:)) next ++ [[]])

-- Converting a string list to pitch list
toChord :: [[Char]] -> [Pitch]
toChord a = map (\x -> Pitch (x !! 0) (x !! 1)) a 

-- retrive a new guess
getNewCombination :: [Pitch] -> [[[Char]]] -> (Int, Int, Int) -> [[[Char]]]
getNewCombination guess allCombinations pFeedback = foldr (\x acc -> if checkFeedback (toChord x) then  x:acc else acc) [] allCombinations
    where checkFeedback nChord = if pFeedback == (feedback guess nChord) then True else False

Haskell moment

Here are some Haskell code chunks, which both are simple recursion algorithms.

Quicksort is a sort of poster child for Haskell because everyone does it to showcase how elegant Haskell is.

quicksort :: (Ord a) => [a] -> [a]
quicksort [] = []
quicksort (x:xs) =
let smallerSorted = quicksort [a | a <- xs, a <= x]
    biggerSoted = quicksort [a | a <- xs, a > x]
in smallerSorted ++ [x] ++ biggerSoted
maximum' :: (Ord a) => [a] -> a
maximum' [] = error "List is empty!"
maximum' [x] = x
maximum' (x:xs) = (max x (maximum' xs))

replicate' :: Int  -> b -> [b]
replicate' n x
    | n <= 0 = []
    | otherwise = (x:(replicate' (n-1) x))

take' :: Int -> [a] -> [a]
take' n (x:xs)
    | n <= 0 = []
    | otherwise = x:(take' (n-1) xs)

reserve' :: [a] -> [a]
reserve' [] = []
reserve' (x:xs) = (reserve' xs) ++ [x]

BERT based sentence scenario detector

前两天用简单的多层感知器搭建了一个Word-level的detector模型。在模型的最后一次是用来Softmax,将Output Layer进行了分类。

对于场景识别这个问题,我目前先规定了可选的类别(比如Forest/ Ocean/ River/ College/ Suburb/ etc.)。这样一方面来说,可以简化detector的工作流程,另外也比较适应我们组目前的资源情况(识别场景之后需要提取事先准备好的Background,如果提取出了新的element也是无法获取到background resource的)。

上周我的想法是先使用Word embedding将Sentence转化为Sequence,然后使用Bi-LSTM或者直接使用Linear CRF对Sequence进行Sequence Tagging,以提取Sentence中涉及场景的Word。最后通过Word-level detector分析所选的Word,得到Sentence-level Scenario。

不过经过实验我发现,由于我手上只有不到500个短篇的儿童故事,还是没有标注的那种。就算我全部拿来进行标注,也只能生成不到5000个Phases。因为Labeler的资源比较紧张,我先用第一版的词表Detector模型生成了Labeling data,丢到CRF里面之后发现出了Person-entities,其他的类别基本无法有效识别出来。

于是这种方法暂时宣告失败。

周五晚上在公司发呆,突然觉得可以试一试力大砖飞的方法,直接使用Sentence-level embedding来作为Input。在这个模型里加入了CNN Layer,但其实单靠Dense Full connect Layer就已经可以在这个数据集上达到同样的效果了。

# 模型构建
model = Sequential([
Conv1D(filters=5, kernel_size=5, strides=1, padding='valid', input_shape=(768, 1), name="Convolution_Layer_1"),
AveragePooling1D(pool_size=5, strides=1, padding="valid", name="Pooling_Layer_1"),

Conv1D(filters=5, kernel_size=5, strides=1, padding='valid', name="Convolution_Layer_2"),
AveragePooling1D(pool_size=5, strides=1, padding="valid", name="Pooling_Layer_2"),

Flatten(name="Flatten_Layer"),

Dense(256, input_dim=3760, name="Dense_Layer_1"),
Activation('relu'),
Dropout(0.1),

Dense(32, input_dim=256, name="Dense_Layer_2"),
Activation('relu'),
Dropout(0.1),

Dense(11, input_dim=32, name="Dense_Layer_3"),
Activation('softmax'),
])