-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathtokenize_test.js
More file actions
33 lines (27 loc) · 915 Bytes
/
tokenize_test.js
File metadata and controls
33 lines (27 loc) · 915 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
// 载入模块
var Segment = require('segment');
// 创建实例
var segment = new Segment();
// 使用默认的识别模块及字典,载入字典文件需要1秒,仅初始化时执行一次即可
segment.useDefault();
//dynamic load
var dicts = require.context('./dict',false,/.*txt$/);
// segment.loadDict(dicts('./jieba.txt'));
var examples = require('./examples');
// TODO port back
function isAlphaNumeric(str) {
var code, i, len;
for (i = 0, len = str.length; i < len; i++) {
code = str.charCodeAt(i);
if (!(code > 47 && code < 58) && // numeric (0-9)
!(code > 64 && code < 91) && // upper alpha (A-Z)
!(code > 96 && code < 123)) { // lower alpha (a-z)
return false;
}
}
return true;
};
// 开始分词
console.log(examples);
console.log(segment.doSegment(examples[0]));
console.log(segment.doSegment('这是一个基于Node.js的中文分词模块。'));