TODO: wordIndexUpperLower 原因: 边界

This commit is contained in:
huangsimin 2019-08-19 19:03:58 +08:00
parent b4829ba058
commit 5a7a4f2c92
3 changed files with 251 additions and 41 deletions

View File

@ -1,15 +1,5 @@
package tried
type TriedString string
func (ts TriedString) Size() uint {
return uint(len(ts))
}
func (ts TriedString) WordIndex(idx uint) uint {
return uint(ts[idx]) - 'a'
}
// func (ts TriedString) WordIndex(idx uint) uint {
// w := ts[idx]
// if w >= 'a' && w <= 'z' {
@ -21,14 +11,9 @@ func (ts TriedString) WordIndex(idx uint) uint {
// }
// }
type ObjectIndex interface {
WordIndex(idx uint) uint
Size() uint
}
type Tried struct {
root *Node
datasize uint
root *Node
wiStore *wordIndexStore
}
type Node struct {
@ -36,25 +21,34 @@ type Node struct {
value interface{}
}
// New 默认 WordIndexLower 意味着只支持小写
func New() *Tried {
tried := &Tried{}
tried.root = new(Node)
tried.datasize = 62
tried.wiStore = WordIndexDict[WordIndexLower]
return tried
}
func (tried *Tried) wordIndex(w byte) uint {
return uint(w) - 'a'
// NewWithWordType 选择单词的类型 WordIndexLower 意味着只支持小写
func NewWithWordType(t WordIndexType) *Tried {
tried := &Tried{}
tried.root = new(Node)
tried.wiStore = WordIndexDict[t]
return tried
}
func (tried *Tried) Put(words string, values ...interface{}) {
cur := tried.root
var n *Node
for i := 0; i < len(words); i++ {
w := tried.wordIndex(words[i])
w := tried.wiStore.Byte2Index(words[i])
if cur.data == nil {
cur.data = make([]*Node, tried.datasize)
cur.data = make([]*Node, tried.wiStore.DataSize)
}
if n = cur.data[w]; n == nil {
@ -81,8 +75,9 @@ func (tried *Tried) Put(words string, values ...interface{}) {
func (tried *Tried) Get(words string) interface{} {
cur := tried.root
var n *Node
for i := 0; i < len(words); i++ {
w := tried.wordIndex(words[i]) //TODO: 升级Index 函数
w := tried.wiStore.Byte2Index(words[i]) //TODO: 升级Index 函数
if n = cur.data[w]; n == nil {
return nil
}

152
tree/tried/tried_index.go Normal file
View File

@ -0,0 +1,152 @@
package tried
var WordIndexDict map[WordIndexType]*wordIndexStore
func init() {
WordIndexDict = make(map[WordIndexType]*wordIndexStore)
WordIndexDict[WordIndexLower] = &wordIndexStore{WordIndexLower, wordIndexLower, indexWordLower, 26}
WordIndexDict[WordIndexUpper] = &wordIndexStore{WordIndexUpper, wordIndexUpper, indexWordUpper, 26}
WordIndexDict[WordIndexDigital] = &wordIndexStore{WordIndexDigital, wordIndexDigital, indexWordDigital, 10}
WordIndexDict[WordIndexUpperLower] = &wordIndexStore{WordIndexUpperLower, wordIndexUpperLower, indexWordUpperLower, 52}
WordIndexDict[WordIndexLowerDigital] = &wordIndexStore{WordIndexLowerDigital, wordIndexLowerDigital, indexWordLowerDigital, 36}
WordIndexDict[WordIndexUpperDigital] = &wordIndexStore{WordIndexUpperDigital, wordIndexUpperDigital, indexWordUpperDigital, 36}
WordIndexDict[WordIndexUpperLowerDigital] = &wordIndexStore{WordIndexUpperLowerDigital, wordIndexUpperLowerDigital, indexWordUpperLowerDigital, 62}
WordIndexDict[WordIndex256] = &wordIndexStore{WordIndex256, wordIndex256, indexWord256, 256}
WordIndexDict[WordIndex32to126] = &wordIndexStore{WordIndex32to126, wordIndex32to126, indexWord32to126, ('~' - ' ' + 1)}
}
// WordIndexType 单词统计的类型 eg. WordIndexLower 意味Put的单词只支持小写...
type WordIndexType int
const (
_ WordIndexType = iota
WordIndexLower
WordIndexUpper
WordIndexDigital
WordIndexUpperLower
WordIndexLowerDigital
WordIndexUpperDigital
WordIndexUpperLowerDigital
WordIndex256
WordIndex32to126
)
type wordIndexStore struct {
Type WordIndexType
Byte2Index func(byte) uint
Index2Byte func(uint) byte
DataSize uint
}
func wordIndexLower(w byte) uint {
return uint(w) - 'a'
}
func indexWordLower(w uint) byte {
return byte(w) + 'a'
}
//
func wordIndexUpper(w byte) uint {
return uint(w) - 'A'
}
func indexWordUpper(w uint) byte {
return byte(w) + 'A'
}
//
func wordIndexDigital(w byte) uint {
return uint(w) - '0'
}
func indexWordDigital(w uint) byte {
return byte(w) + '0'
}
//
func wordIndexUpperLower(w byte) uint {
iw := uint(w)
if iw > 'a' {
return iw - 'a'
}
return iw - 'A' + 26
}
func indexWordUpperLower(w uint) byte {
if w >= 26 {
return byte(w) + 'A'
}
return byte(w) + 'a'
}
//
func wordIndexLowerDigital(w byte) uint {
iw := uint(w)
if iw > 'a' {
return iw - 'a'
}
return iw - '0' + 26
}
func indexWordLowerDigital(w uint) byte {
if w >= 26 {
return byte(w) + '0'
}
return byte(w) + 'a'
}
//
func wordIndexUpperDigital(w byte) uint {
iw := uint(w)
if iw > 'A' {
return iw - 'A'
}
return iw - '0' + 26
}
func indexWordUpperDigital(w uint) byte {
if w >= 26 {
return byte(w) + '0'
}
return byte(w) + 'a'
}
//
func wordIndexUpperLowerDigital(w byte) uint {
iw := uint(w)
if iw > 'a' {
return iw - 'a'
} else if iw > 'A' {
return iw - 'A' + 26
}
return iw - '0' + 52
}
func indexWordUpperLowerDigital(w uint) byte {
if w >= 52 {
return byte(w) + '0'
} else if w >= 26 {
return byte(w) + 'A'
}
return byte(w) + 'a'
}
// wordIndex256 all byte
func wordIndex256(w byte) uint {
return uint(w)
}
func indexWord256(w uint) byte {
return byte(w)
}
// wordIndex32to126 空格-~ 0-9 a-z A-Z 符号等
func wordIndex32to126(w byte) uint {
return uint(w) - ' '
}
func indexWord32to126(w uint) byte {
return byte(w) + ' '
}

View File

@ -1,11 +1,44 @@
package tried
import (
"bytes"
"encoding/gob"
"os"
"testing"
"github.com/Pallinder/go-randomdata"
)
func TestTried_NewWith(t *testing.T) {
tried := NewWithWordType(WordIndex32to126)
words := "~ 23fd "
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil")
}
tried = NewWithWordType(WordIndexLower)
words = "az"
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil")
}
tried = NewWithWordType(WordIndexUpper)
words = "AZ"
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil")
}
tried = NewWithWordType(WordIndexUpperLower)
words = "AZazsdfsd"
tried.Put(words)
if tried.Get(words) == nil {
t.Error("should be not nil")
}
}
func TestTried_PutAndGet1(t *testing.T) {
tried := New()
@ -72,20 +105,49 @@ func TestTried_Traversal(t *testing.T) {
}
}
func TesStoreData(t *testing.T) {
var l []string
const N = 1000000
for i := 0; i < N; i++ {
var content []rune
for c := 0; c < randomdata.Number(5, 15); c++ {
char := randomdata.Number(0, 26) + 'a'
content = append(content, rune(byte(char)))
}
l = append(l, (string(content)))
}
var result bytes.Buffer
encoder := gob.NewEncoder(&result)
encoder.Encode(l)
lbytes := result.Bytes()
f, _ := os.OpenFile("tried.log", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0666)
f.Write(lbytes)
}
func Load() []string {
var result []string
f, _ := os.Open("tried.log")
gob.NewDecoder(f).Decode(&result)
return result
}
func BenchmarkTried_Put(b *testing.B) {
var data []string
b.N = 1000000
count := 10
for i := 0; i < b.N; i++ {
var content []rune
for c := 0; c < randomdata.Number(5, 15); c++ {
char := randomdata.Number(0, 26) + 'a'
content = append(content, rune(byte(char)))
}
data = append(data, (string(content)))
}
// for i := 0; i < b.N; i++ {
// var content []rune
// for c := 0; c < randomdata.Number(5, 15); c++ {
// char := randomdata.Number(0, 26) + 'a'
// content = append(content, rune(byte(char)))
// }
// data = append(data, (string(content)))
// }
data = Load()
b.ResetTimer()
b.N = b.N * count
@ -98,19 +160,20 @@ func BenchmarkTried_Put(b *testing.B) {
}
func BenchmarkTried_Get(b *testing.B) {
b.StopTimer()
var data []string
b.N = 1000000
count := 10
for i := 0; i < b.N; i++ {
var content []rune
for c := 0; c < randomdata.Number(5, 15); c++ {
char := randomdata.Number(0, 26) + 'a'
content = append(content, rune(byte(char)))
}
data = append(data, string(content))
}
// for i := 0; i < b.N; i++ {
// var content []rune
// for c := 0; c < randomdata.Number(5, 15); c++ {
// char := randomdata.Number(0, 26) + 'a'
// content = append(content, rune(byte(char)))
// }
// data = append(data, string(content))
// }
data = Load()
b.N = b.N * count
@ -119,7 +182,7 @@ func BenchmarkTried_Get(b *testing.B) {
tried.Put(v)
}
b.ResetTimer()
b.StartTimer()
for c := 0; c < count; c++ {
for _, v := range data {
tried.Get(v)