笔划输入法查找算法示例(Lua实现)

来源:互联网 发布:淘宝主图要求规范 编辑:程序博客网 时间:2024/06/11 22:29


公司同事最近利用谷歌拼音输入法实现了自己的拼音输入法,经过了解,最核心的就是一个trie(词典树)的构造和检索(这里不太介绍trie树了,google一搜一大把),于是今天就想实现了一个笔划输入法。大概的想法是:

  1. 找一个所有汉字或者一、二级汉字的笔顺数据库
  2. 用Lua将数据库读进来,构造一个trie树
    1. 每一个节点存一个笔划
    2. 每一个节点带一个子节点集合
    3. 每一个节点带一个汉字集合,表示到这一级时所有笔划组成的完整汉字
  3. 检索时根据用户输入的笔划,检索到一个节点,然后按笔划顺序遍历子树
    1. 遍历子树可以给出所有以这些笔划开始的所有汉字,但是总不能一下显示出来吧,所以需要一个迭代器,每调用一次给出一个可能的值,这个迭代器用C实现比较复杂,但是用Lua实现简直就是小意思,直接将遍历子树的函数封装到一个coroutine中,每找到一个汉字就 yield(汉字) 即可

笔顺数据库

CSDN上可以下载到 http://download.csdn.net/detail/yyjlan/3766691

下载的mdb格式,我不太喜欢,Lua也不太喜欢。由于luasql支持odbc,所以可以将mdb文件加入到odbc数据源,然后载入后转成sqlite3的格式,方便以后使用,转换代码如下

require "luasql.odbc"require "luasql.sqlite3"odbc_env = luasql.odbc()-- 将Access文件在控制面板->管理工具->数据源 中增加到用户DSN,名称是hzbsodbc_conn = odbc_env:connect("hzbs")odbc_cur = odbc_conn:execute("SELECT * FROM hzbs;")sqlite_env = luasql.sqlite3()sqlite_conn = sqlite_env:connect("hzbs.sqlite3.db")sqlite_conn:execute("CREATE TABLE hzbs (id INTEGER primary key, hanzi TEXT, stroke_number INTEGER, stroke_order TEXT, unicode TEXT, gbk TEXT);")sqlite_conn:setautocommit(false) -- start transactionrecord = {}while odbc_cur:fetch(record, "n") dolocal id = record[1]local hanzi = record[2]local stroke_number = record[3]local stroke_order = record[4]local unicode = record[5]local gbk = record[6]sqlite_conn:execute("INSERT INTO hzbs(id, hanzi, stroke_number, stroke_order, unicode, gbk) VALUES(" .. id .. ",\'" .. hanzi .. "\'," .. stroke_number .. ",\'" .. stroke_order .. "\',\'" .. unicode .. "\',\'" .. gbk .. "\');")endsqlite_conn:commit() -- commit the transactionsqlite_conn:close()odbc_cur:close()odbc_conn:close()odbc_env:close()

构造子树与检索

多的不说,直接看代码吧。代码写得有点乱,不过凑合看是没什么问题的。要运行代码必须要先安装 LuaForWindows

require "luasql.sqlite3"require "wx"function _T(s)return send-- enum stroke_t {local stroke_root = 0 -- for trie root, not a valid strokelocal stroke_heng = 1local stroke_shu = 2local stroke_pie = 3local stroke_na = 4local stroke_zhe = 5local stroke_max = 5local stroke_text = {_T"一", _T"丨", _T"丿", _T"丶", _T"乛"}-- }function new_node(stroke)return {stroke=stroke,  -- see stroke definitionsubnodes = {},  -- next strokeshanzis={} -- two or more hanzi could have the same stroke order}endfunction new_trie()return new_node(stroke_root)end-- insert hanzi and create the triefunction insert_hanzi(node, stroke_order, hanzi)local stroke, not_found_indexfor i = 1, #stroke_order dostroke = tonumber(stroke_order:sub(i,i))if node.subnodes[stroke] thennode = node.subnodes[stroke]elsenot_found_index = ibreakendendif not_found_index thenfor i = not_found_index, #stroke_order dostroke = tonumber(stroke_order:sub(i,i))node.subnodes[stroke] = new_node(stroke)node = node.subnodes[stroke]endendtable.insert(node.hanzis, hanzi)end-- 看看strokes数组组成的笔划顺序的节点是否存在,如果存在则返回节点function find_node(root, strokes)local node = rootif #strokes < 1 thenreturn nilendfor i, stroke in ipairs(strokes) doif node.subnodes[stroke] thennode = node.subnodes[stroke]elsereturn nilendendreturn nodeendfunction db_to_trie(db_name)local env = luasql.sqlite3()local conn = env:connect(db_name)local cur = conn:execute("SELECT hanzi,stroke_order FROM hzbs;")local trie = new_trie()record = {}while cur:fetch(record, "a") doinsert_hanzi(trie, record.stroke_order, record.hanzi)endcur:close()conn:close()env:close()return trieendfunction get_hanzi_enumerator(root)local traversetraverse = function(node)for i = 1, #node.hanzis docoroutine.yield(node.hanzis[i])endfor stroke = 1, stroke_max doif node.subnodes[stroke] thentraverse(node.subnodes[stroke])endendendlocal co = coroutine.create(function () traverse(root) end)return (function ()local ret, hanzi = coroutine.resume(co)if not ret then -- already stoppedreturn nilelseif hanzi == nil then -- the last call, no yield and no return valuereturn nilelsereturn hanziendend)end----------------------------------------------------------------- GUIlocal new_id = (function ()local id = wx.wxID_HIGHESTreturn (function ()id = id + 1return idend)end)()dialog = wx.wxDialog(wx.NULL, wx.wxID_ANY, _T"Lua笔划输入法演示",wx.wxDefaultPosition, wx.wxDefaultSize)panel = wx.wxPanel(dialog, wx.wxID_ANY)local main_sizer = wx.wxBoxSizer(wx.wxVERTICAL)-- 横竖撇捺折 按钮local stroke_label = wx.wxStaticText(panel, new_id(), _T"可选笔划")local heng_button = wx.wxButton(panel, stroke_heng, stroke_text[stroke_heng])local shu_button = wx.wxButton(panel, stroke_shu, stroke_text[stroke_shu])local pie_button = wx.wxButton(panel, stroke_pie, stroke_text[stroke_pie])local na_button = wx.wxButton(panel, stroke_na, stroke_text[stroke_na])local zhe_button = wx.wxButton(panel, stroke_zhe, stroke_text[stroke_zhe])local button_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)button_sizer:Add(stroke_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)button_sizer:Add(heng_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)button_sizer:Add(shu_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)button_sizer:Add(pie_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)button_sizer:Add(na_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)button_sizer:Add(zhe_button, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)main_sizer:Add(button_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)-- 输入笔划列表local input_label = wx.wxStaticText(panel, new_id(), _T"输入笔划")local input_textctrl = wx.wxTextCtrl(panel, new_id(), "",wx.wxDefaultPosition, wx.wxDefaultSize, wx.wxTE_READONLY)local input_backspace_button = wx.wxButton(panel, new_id(), _T"退格")local input_clear_button = wx.wxButton(panel, new_id(), _T"清除")local input_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)input_sizer:Add(input_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)input_sizer:Add(input_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)input_sizer:Add(input_backspace_button, 0, wx.wxALL, 5)input_sizer:Add(input_clear_button, 0, wx.wxALL, 5)main_sizer:Add(input_sizer, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)-- 备选汉字local candidate_label = wx.wxStaticText(panel, new_id(), _T"备选汉字")local candidate_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)candidate_sizer:Add(candidate_label, 0, wx.wxALIGN_LEFT+wx.wxALL, 5)local candidate_number = 5function create_candidate_btn(num)local textctrls = {}for i= 1, num dotextctrls[i] = wx.wxButton(panel, new_id(), "")candidate_sizer:Add(textctrls[i], 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)endtextctrls.start_id = textctrls[1]:GetId()textctrls.end_id = textctrls.start_id + candidate_number - 1return textctrlsendlocal candidate_textctrls = create_candidate_btn(candidate_number)main_sizer:Add(candidate_sizer, 1, wx.wxALIGN_LEFT+wx.wxALL+wx.wxEXPAND, 5)-- 选择输出的汉字local output_textctrl = wx.wxTextCtrl(panel, new_id(), "", wx.wxDefaultPosition,wx.wxSize(0, 100), wx.wxTE_MULTILINE)local output_sizer = wx.wxBoxSizer(wx.wxHORIZONTAL)output_sizer:Add(output_textctrl, 1, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 5)main_sizer:Add(output_sizer, 0, wx.wxALIGN_LEFT+wx.wxEXPAND+wx.wxALL, 0)main_sizer:SetSizeHints(dialog)dialog:SetSizer(main_sizer)-- 必须加,否则不能正确退出程序dialog:Connect(wx.wxEVT_CLOSE_WINDOW,function (event)dialog:Destroy()event:Skip()end)-- 读入笔划数据库local trie = db_to_trie("hzbs.sqlite3.db")-- 输入的stroke数组input_strokes = {}get_next_candidate = nilfunction update_candidate()if get_next_candidate == nil thenfor _,textctrl in ipairs(candidate_textctrls) dotextctrl:SetLabel("")endelsefor _,textctrl in ipairs(candidate_textctrls) dolocal hanzi = get_next_candidate()if hanzi thentextctrl:SetLabel(hanzi)elsetextctrl:SetLabel("")endendendendfunction update_input()local text = {}for _,stroke in ipairs(input_strokes) dotable.insert(text, stroke_text[stroke])endinput_textctrl:SetValue(table.concat(text, " "))endfunction insert_stroke(stroke)table.insert(input_strokes, stroke);local node = find_node(trie, input_strokes)if node == nil thentable.remove(input_strokes) -- 删除不合法的输入-- BEEPelseget_next_candidate = get_hanzi_enumerator(node)update_input()update_candidate()endendfunction remove_stroke()table.remove(input_strokes)local node = find_node(trie, input_strokes)if node == nil thenget_next_candidate = nil        else                get_next_candidate = get_hanzi_enumerator(node)        endupdate_input()update_candidate()endfunction clear_stroke()input_strokes = {}get_next_candidate = nilupdate_input()update_candidate()enddialog:Connect(wx.wxID_ANY, wx.wxEVT_COMMAND_BUTTON_CLICKED,function(event)local id = event:GetId()if id <= stroke_max theninsert_stroke(id)elseif id >= candidate_textctrls.start_id and id <= candidate_textctrls.end_id thenoutput_textctrl:AppendText(candidate_textctrls[id-candidate_textctrls.start_id+1]:GetLabel())clear_stroke()elseif id == input_backspace_button:GetId() thenremove_stroke()elseif id == input_clear_button:GetId() thenclear_stroke()endend)dialog:Centre()dialog:Show(true)wx.wxGetApp():MainLoop()

打包下载

转好的数据库文件和源代码文件,可以去我的资源上下载(CSDN的上传太垃圾,我已经搬到cnblogs上了,文件上传到那里了,链接http://files.cnblogs.com/windtail/ime-win32.rar,再一看,居然CSDN上传文件延迟了一天才上来,悲剧http://download.csdn.net/detail/windtailljj/4476402),先来张截图~~




原创粉丝点击