You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
121 lines
3.9 KiB
JavaScript
121 lines
3.9 KiB
JavaScript
/*!
|
|
* Lunr languages, `Japanese` language
|
|
* https://github.com/MihaiValentin/lunr-languages
|
|
*
|
|
* Copyright 2014, Chad Liu
|
|
* http://www.mozilla.org/MPL/
|
|
*/
|
|
/*!
|
|
* based on
|
|
* Snowball JavaScript Library v0.3
|
|
* http://code.google.com/p/urim/
|
|
* http://snowball.tartarus.org/
|
|
*
|
|
* Copyright 2010, Oleg Mazko
|
|
* http://www.mozilla.org/MPL/
|
|
*/
|
|
|
|
/**
|
|
* export the module via AMD, CommonJS or as a browser global
|
|
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
|
|
*/
|
|
;
|
|
(function(root, factory) {
|
|
if (typeof define === 'function' && define.amd) {
|
|
// AMD. Register as an anonymous module.
|
|
define(factory)
|
|
} else if (typeof exports === 'object') {
|
|
/**
|
|
* Node. Does not work with strict CommonJS, but
|
|
* only CommonJS-like environments that support module.exports,
|
|
* like Node.
|
|
*/
|
|
module.exports = factory()
|
|
} else {
|
|
// Browser globals (root is window)
|
|
factory()(root.lunr);
|
|
}
|
|
}(this, function() {
|
|
/**
|
|
* Just return a value to define the module export.
|
|
* This example returns an object, but the module
|
|
* can return a function as the exported value.
|
|
*/
|
|
return function(lunr) {
|
|
/* throw error if lunr is not yet included */
|
|
if ('undefined' === typeof lunr) {
|
|
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
|
|
}
|
|
|
|
/* throw error if lunr stemmer support is not yet included */
|
|
if ('undefined' === typeof lunr.stemmerSupport) {
|
|
throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.');
|
|
}
|
|
|
|
/* register specific locale function */
|
|
lunr.jp = function() {
|
|
this.pipeline.reset();
|
|
this.pipeline.add(
|
|
lunr.jp.stopWordFilter,
|
|
lunr.jp.stemmer
|
|
);
|
|
// change the tokenizer for japanese one
|
|
lunr.tokenizer = lunr.jp.tokenizer;
|
|
};
|
|
var segmenter = new TinySegmenter(); // インスタンス生成
|
|
|
|
lunr.jp.tokenizer = function (obj) {
|
|
if (!arguments.length || obj == null || obj == undefined) return [];
|
|
if (Array.isArray(obj))
|
|
return obj.map(function (t) {
|
|
return t.toLowerCase();
|
|
});
|
|
|
|
var str = obj.toString().replace(/^\s+/, '');
|
|
|
|
for (var i = str.length - 1; i >= 0; i--) {
|
|
if (/\S/.test(str.charAt(i))) {
|
|
str = str.substring(0, i + 1);
|
|
break;
|
|
}
|
|
}
|
|
|
|
var segs = segmenter.segment(str); // 単語の配列が返る
|
|
return segs
|
|
.filter(function (token) {
|
|
return !!token;
|
|
})
|
|
.map(function (token) {
|
|
return token;
|
|
});
|
|
};
|
|
|
|
/* lunr stemmer function */
|
|
lunr.jp.stemmer = (function () {
|
|
/* TODO japanese stemmer */
|
|
return function (word) {
|
|
return word;
|
|
};
|
|
})();
|
|
|
|
lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp');
|
|
|
|
/* stop word filter function */
|
|
lunr.jp.stopWordFilter = function(token) {
|
|
if (lunr.jp.stopWordFilter.stopWords.indexOf(token) === -1) {
|
|
return token;
|
|
}
|
|
};
|
|
|
|
lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet();
|
|
lunr.jp.stopWordFilter.stopWords.length = 45;
|
|
|
|
// The space at the beginning is crucial: It marks the empty string
|
|
// as a stop word. lunr.js crashes during search when documents
|
|
// processed by the pipeline still contain the empty string.
|
|
// stopword for japanese is from http://www.ranks.nl/stopwords/japanese
|
|
lunr.jp.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' ');
|
|
lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp');
|
|
};
|
|
}))
|