预先说明:以项目名为 x 作为示例
第一步:在GitHub找个汉字拼音字典(随便找了一个输入法项目的字典)
https://github.com/MobtgZhang/LingJian-Pinyin/blob/main/data/pinyin_dict.txt
第二步:把字典下载下来放在项目里,例如:data/pinyin_dict.txt
第三步:创建 com/pinyin_han.go 文件,把代码复制进去
x/com/pinyin_han.go
go
package com
import (
"bufio"
"os"
"strings"
"sync"
)
type PinyinHan struct {
pyToHan map[string][]string
abbrToHan map[string][]string // 简拼映射
noToneToHan map[string][]string // 无声调映射
mutex sync.RWMutex
}
func NewPinyinHan() *PinyinHan {
return &PinyinHan{
pyToHan: make(map[string][]string),
abbrToHan: make(map[string][]string),
noToneToHan: make(map[string][]string),
}
}
func (p *PinyinHan) LoadDictionary(filename string) error {
p.mutex.Lock()
defer p.mutex.Unlock()
file, err := os.Open(filename)
if err != nil {
return err
}
defer func(file *os.File) {
_ = file.Close()
}(file)
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
parts := strings.Fields(line)
if len(parts) >= 2 {
pinyin := parts[0]
hanzi := parts[1]
// 标准拼音映射
p.pyToHan[pinyin] = append(p.pyToHan[pinyin], hanzi)
// 构建简拼映射
abbr := p.getAbbreviation(pinyin)
p.abbrToHan[abbr] = append(p.abbrToHan[abbr], hanzi)
// 构建无声调映射
noTone := p.removeTone(pinyin)
p.noToneToHan[noTone] = append(p.noToneToHan[noTone], hanzi)
}
}
return scanner.Err()
}
func (p *PinyinHan) getAbbreviation(pinyin string) string {
syllables := strings.Fields(pinyin)
var abbr strings.Builder
for _, syl := range syllables {
if len(syl) > 0 {
abbr.WriteByte(syl[0])
}
}
return abbr.String()
}
func (p *PinyinHan) removeTone(pinyin string) string {
// 简单的声调数字去除
result := make([]byte, 0, len(pinyin))
for i := 0; i < len(pinyin); i++ {
if pinyin[i] < '1' || pinyin[i] > '4' {
result = append(result, pinyin[i])
}
}
return string(result)
}
func (p *PinyinHan) Convert(pinyin string) []string {
p.mutex.RLock()
defer p.mutex.RUnlock()
// 优先级:精确匹配 > 无声调匹配 > 简拼匹配
if result, exists := p.pyToHan[pinyin]; exists {
return result
}
noTone := p.removeTone(pinyin)
if result, exists := p.noToneToHan[noTone]; exists {
return result
}
if result, exists := p.abbrToHan[pinyin]; exists {
return result
}
return nil
}
func (p *PinyinHan) Run(dictPath string, pinyin []string) ([]string, error) {
err := p.LoadDictionary(dictPath)
if err != nil {
return nil, err
}
var out = make([]string, 0)
for _, v := range pinyin {
v = strings.ReplaceAll(v, " ", "")
result := p.Convert(v)
for _, s := range result {
out = append(out, s)
}
}
return out, nil
}
第四步:直接调用(以Test为例)
x/test/pinyin_han_test.go
go
package test
import (
"testing"
"x/com"
)
func TestPinyinHan(t *testing.T) {
var pinyin = []string{"guangdong", "hainan", "fujian", "taiwan", "a"}
han, err := com.NewPinyinHan().Run("../data/pinyin_dict.txt", pinyin)
if err != nil {
t.Error(err)
return
}
t.Log(han)
}
输出结果:

说句题外话,其实字典可以根据业务在做一些轻量化定制,例如我的项目里,只有检索功能才用到,我会根据文本内容用到一些分词器、拼音转换器生成自定义数据写进字典里。
评论
发表评论