mirror of
https://github.com/welpo/tabi.git
synced 2026-02-15 23:57:19 +01:00
🐛 fix: support Japanese search (#620)
This commit is contained in:
parent
e04ed249d6
commit
e9a8fa86da
7 changed files with 1632 additions and 121 deletions
134
static/js/lunr/lunr.ja.js
Normal file
134
static/js/lunr/lunr.ja.js
Normal file
|
|
@ -0,0 +1,134 @@
|
||||||
|
/*!
|
||||||
|
* Lunr languages, `Japanese` language
|
||||||
|
* https://github.com/MihaiValentin/lunr-languages
|
||||||
|
*
|
||||||
|
* Copyright 2014, Chad Liu
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*/
|
||||||
|
/*!
|
||||||
|
* based on
|
||||||
|
* Snowball JavaScript Library v0.3
|
||||||
|
* http://code.google.com/p/urim/
|
||||||
|
* http://snowball.tartarus.org/
|
||||||
|
*
|
||||||
|
* Copyright 2010, Oleg Mazko
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* export the module via AMD, CommonJS or as a browser global
|
||||||
|
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
|
||||||
|
*/
|
||||||
|
;
|
||||||
|
(function(root, factory) {
|
||||||
|
if (typeof define === 'function' && define.amd) {
|
||||||
|
// AMD. Register as an anonymous module.
|
||||||
|
define(factory)
|
||||||
|
} else if (typeof exports === 'object') {
|
||||||
|
/**
|
||||||
|
* Node. Does not work with strict CommonJS, but
|
||||||
|
* only CommonJS-like environments that support module.exports,
|
||||||
|
* like Node.
|
||||||
|
*/
|
||||||
|
module.exports = factory()
|
||||||
|
} else {
|
||||||
|
// Browser globals (root is window)
|
||||||
|
factory()(root.lunr);
|
||||||
|
}
|
||||||
|
}(this, function() {
|
||||||
|
/**
|
||||||
|
* Just return a value to define the module export.
|
||||||
|
* This example returns an object, but the module
|
||||||
|
* can return a function as the exported value.
|
||||||
|
*/
|
||||||
|
return function(lunr) {
|
||||||
|
/* throw error if lunr is not yet included */
|
||||||
|
if ('undefined' === typeof lunr) {
|
||||||
|
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* throw error if lunr stemmer support is not yet included */
|
||||||
|
if ('undefined' === typeof lunr.stemmerSupport) {
|
||||||
|
throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* register specific locale function */
|
||||||
|
lunr.ja = function() {
|
||||||
|
this.pipeline.reset();
|
||||||
|
this.pipeline.add(
|
||||||
|
lunr.ja.trimmer,
|
||||||
|
lunr.ja.stopWordFilter,
|
||||||
|
lunr.ja.stemmer
|
||||||
|
);
|
||||||
|
// change the tokenizer for japanese one
|
||||||
|
lunr.tokenizer = lunr.ja.tokenizer;
|
||||||
|
};
|
||||||
|
var segmenter = new TinySegmenter(); // インスタンス生成
|
||||||
|
|
||||||
|
lunr.ja.tokenizer = function (obj) {
|
||||||
|
if (!arguments.length || obj == null || obj == undefined) return []
|
||||||
|
if (Array.isArray(obj)) return obj.map(function (t) { return t.toLowerCase() })
|
||||||
|
|
||||||
|
var str = obj.toString().replace(/^\s+/, '')
|
||||||
|
|
||||||
|
for (var i = str.length - 1; i >= 0; i--) {
|
||||||
|
if (/\S/.test(str.charAt(i))) {
|
||||||
|
str = str.substring(0, i + 1)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var segs = segmenter.segment(str); // 単語の配列が返る
|
||||||
|
return segs.filter(function (token) {
|
||||||
|
return !!token
|
||||||
|
})
|
||||||
|
.map(function (token) {
|
||||||
|
return token
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/* lunr stemmer function */
|
||||||
|
lunr.ja.stemmer = (function() {
|
||||||
|
|
||||||
|
/* TODO japanese stemmer */
|
||||||
|
return function(word) {
|
||||||
|
return word;
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
lunr.Pipeline.registerFunction(lunr.ja.stemmer, 'stemmer-ja');
|
||||||
|
|
||||||
|
/* lunr trimmer function */
|
||||||
|
lunr.ja.wordCharacters = "一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9";
|
||||||
|
lunr.ja.trimmer = lunr.trimmerSupport.generateTrimmer(lunr.ja.wordCharacters);
|
||||||
|
lunr.Pipeline.registerFunction(lunr.ja.trimmer, 'trimmer-ja');
|
||||||
|
|
||||||
|
/* stop word filter function */
|
||||||
|
lunr.ja.stopWordFilter = function(token) {
|
||||||
|
if (lunr.ja.stopWordFilter.stopWords.indexOf(token) === -1) {
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
lunr.ja.stopWordFilter.stopWords = new lunr.SortedSet();
|
||||||
|
lunr.ja.stopWordFilter.stopWords.length = 45;
|
||||||
|
|
||||||
|
// The space at the beginning is crucial: It marks the empty string
|
||||||
|
// as a stop word. lunr.js crashes during search when documents
|
||||||
|
// processed by the pipeline still contain the empty string.
|
||||||
|
// stopword for japanese is from http://www.ranks.nl/stopwords/japanese
|
||||||
|
lunr.ja.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' ');
|
||||||
|
lunr.Pipeline.registerFunction(lunr.ja.stopWordFilter, 'stopWordFilter-ja');
|
||||||
|
|
||||||
|
// alias ja => jp for backward-compatibility.
|
||||||
|
// jp is the country code, while ja is the language code
|
||||||
|
// a new lunr.ja.js has been created, but in order to
|
||||||
|
// keep the backward compatibility, we'll leave the lunr.jp.js
|
||||||
|
// here for a while, and just make it use the new lunr.ja.js
|
||||||
|
lunr.jp = lunr.ja;
|
||||||
|
lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp');
|
||||||
|
lunr.Pipeline.registerFunction(lunr.jp.trimmer, 'trimmer-jp');
|
||||||
|
lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp');
|
||||||
|
};
|
||||||
|
}))
|
||||||
1
static/js/lunr/lunr.ja.min.js
vendored
Normal file
1
static/js/lunr/lunr.ja.min.js
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
!function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,(function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ja=function(){this.pipeline.reset(),this.pipeline.add(e.ja.trimmer,e.ja.stopWordFilter,e.ja.stemmer),e.tokenizer=e.ja.tokenizer};var r=new TinySegmenter;e.ja.tokenizer=function(e){if(!arguments.length||null==e||null==e)return[];if(Array.isArray(e))return e.map((function(e){return e.toLowerCase()}));for(var t=e.toString().replace(/^\s+/,""),i=t.length-1;i>=0;i--)if(/\S/.test(t.charAt(i))){t=t.substring(0,i+1);break}return r.segment(t).filter((function(e){return!!e})).map((function(e){return e}))},e.ja.stemmer=function(e){return e},e.Pipeline.registerFunction(e.ja.stemmer,"stemmer-ja"),e.ja.wordCharacters="一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーア-ン゙a-zA-Za-zA-Z0-90-9",e.ja.trimmer=e.trimmerSupport.generateTrimmer(e.ja.wordCharacters),e.Pipeline.registerFunction(e.ja.trimmer,"trimmer-ja"),e.ja.stopWordFilter=function(r){if(-1===e.ja.stopWordFilter.stopWords.indexOf(r))return r},e.ja.stopWordFilter.stopWords=new e.SortedSet,e.ja.stopWordFilter.stopWords.length=45,e.ja.stopWordFilter.stopWords.elements=" これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし".split(" "),e.Pipeline.registerFunction(e.ja.stopWordFilter,"stopWordFilter-ja"),e.jp=e.ja,e.Pipeline.registerFunction(e.jp.stemmer,"stemmer-jp"),e.Pipeline.registerFunction(e.jp.trimmer,"trimmer-jp"),e.Pipeline.registerFunction(e.jp.stopWordFilter,"stopWordFilter-jp")}}));
|
||||||
|
|
@ -1,120 +0,0 @@
|
||||||
/*!
|
|
||||||
* Lunr languages, `Japanese` language
|
|
||||||
* https://github.com/MihaiValentin/lunr-languages
|
|
||||||
*
|
|
||||||
* Copyright 2014, Chad Liu
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*/
|
|
||||||
/*!
|
|
||||||
* based on
|
|
||||||
* Snowball JavaScript Library v0.3
|
|
||||||
* http://code.google.com/p/urim/
|
|
||||||
* http://snowball.tartarus.org/
|
|
||||||
*
|
|
||||||
* Copyright 2010, Oleg Mazko
|
|
||||||
* http://www.mozilla.org/MPL/
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* export the module via AMD, CommonJS or as a browser global
|
|
||||||
* Export code from https://github.com/umdjs/umd/blob/master/returnExports.js
|
|
||||||
*/
|
|
||||||
;
|
|
||||||
(function(root, factory) {
|
|
||||||
if (typeof define === 'function' && define.amd) {
|
|
||||||
// AMD. Register as an anonymous module.
|
|
||||||
define(factory)
|
|
||||||
} else if (typeof exports === 'object') {
|
|
||||||
/**
|
|
||||||
* Node. Does not work with strict CommonJS, but
|
|
||||||
* only CommonJS-like environments that support module.exports,
|
|
||||||
* like Node.
|
|
||||||
*/
|
|
||||||
module.exports = factory()
|
|
||||||
} else {
|
|
||||||
// Browser globals (root is window)
|
|
||||||
factory()(root.lunr);
|
|
||||||
}
|
|
||||||
}(this, function() {
|
|
||||||
/**
|
|
||||||
* Just return a value to define the module export.
|
|
||||||
* This example returns an object, but the module
|
|
||||||
* can return a function as the exported value.
|
|
||||||
*/
|
|
||||||
return function(lunr) {
|
|
||||||
/* throw error if lunr is not yet included */
|
|
||||||
if ('undefined' === typeof lunr) {
|
|
||||||
throw new Error('Lunr is not present. Please include / require Lunr before this script.');
|
|
||||||
}
|
|
||||||
|
|
||||||
/* throw error if lunr stemmer support is not yet included */
|
|
||||||
if ('undefined' === typeof lunr.stemmerSupport) {
|
|
||||||
throw new Error('Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.');
|
|
||||||
}
|
|
||||||
|
|
||||||
/* register specific locale function */
|
|
||||||
lunr.jp = function() {
|
|
||||||
this.pipeline.reset();
|
|
||||||
this.pipeline.add(
|
|
||||||
lunr.jp.stopWordFilter,
|
|
||||||
lunr.jp.stemmer
|
|
||||||
);
|
|
||||||
// change the tokenizer for japanese one
|
|
||||||
lunr.tokenizer = lunr.jp.tokenizer;
|
|
||||||
};
|
|
||||||
var segmenter = new TinySegmenter(); // インスタンス生成
|
|
||||||
|
|
||||||
lunr.jp.tokenizer = function (obj) {
|
|
||||||
if (!arguments.length || obj == null || obj == undefined) return [];
|
|
||||||
if (Array.isArray(obj))
|
|
||||||
return obj.map(function (t) {
|
|
||||||
return t.toLowerCase();
|
|
||||||
});
|
|
||||||
|
|
||||||
var str = obj.toString().replace(/^\s+/, '');
|
|
||||||
|
|
||||||
for (var i = str.length - 1; i >= 0; i--) {
|
|
||||||
if (/\S/.test(str.charAt(i))) {
|
|
||||||
str = str.substring(0, i + 1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var segs = segmenter.segment(str); // 単語の配列が返る
|
|
||||||
return segs
|
|
||||||
.filter(function (token) {
|
|
||||||
return !!token;
|
|
||||||
})
|
|
||||||
.map(function (token) {
|
|
||||||
return token;
|
|
||||||
});
|
|
||||||
};
|
|
||||||
|
|
||||||
/* lunr stemmer function */
|
|
||||||
lunr.jp.stemmer = (function () {
|
|
||||||
/* TODO japanese stemmer */
|
|
||||||
return function (word) {
|
|
||||||
return word;
|
|
||||||
};
|
|
||||||
})();
|
|
||||||
|
|
||||||
lunr.Pipeline.registerFunction(lunr.jp.stemmer, 'stemmer-jp');
|
|
||||||
|
|
||||||
/* stop word filter function */
|
|
||||||
lunr.jp.stopWordFilter = function(token) {
|
|
||||||
if (lunr.jp.stopWordFilter.stopWords.indexOf(token) === -1) {
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
lunr.jp.stopWordFilter.stopWords = new lunr.SortedSet();
|
|
||||||
lunr.jp.stopWordFilter.stopWords.length = 45;
|
|
||||||
|
|
||||||
// The space at the beginning is crucial: It marks the empty string
|
|
||||||
// as a stop word. lunr.js crashes during search when documents
|
|
||||||
// processed by the pipeline still contain the empty string.
|
|
||||||
// stopword for japanese is from http://www.ranks.nl/stopwords/japanese
|
|
||||||
lunr.jp.stopWordFilter.stopWords.elements = ' これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし'.split(' ');
|
|
||||||
lunr.Pipeline.registerFunction(lunr.jp.stopWordFilter, 'stopWordFilter-jp');
|
|
||||||
};
|
|
||||||
}))
|
|
||||||
1
static/js/lunr/lunr.jp.min.js
vendored
1
static/js/lunr/lunr.jp.min.js
vendored
|
|
@ -1 +0,0 @@
|
||||||
!function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(r){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");r.jp=function(){this.pipeline.reset(),this.pipeline.add(r.jp.stopWordFilter,r.jp.stemmer),r.tokenizer=r.jp.tokenizer};var n=new TinySegmenter;r.jp.tokenizer=function(e){if(!arguments.length||null==e)return[];if(Array.isArray(e))return e.map(function(e){return e.toLowerCase()});for(var r=e.toString().replace(/^\s+/,""),t=r.length-1;0<=t;t--)if(/\S/.test(r.charAt(t))){r=r.substring(0,t+1);break}return n.segment(r).filter(function(e){return!!e}).map(function(e){return e})},r.jp.stemmer=function(e){return e},r.Pipeline.registerFunction(r.jp.stemmer,"stemmer-jp"),r.jp.stopWordFilter=function(e){if(-1===r.jp.stopWordFilter.stopWords.indexOf(e))return e},r.jp.stopWordFilter.stopWords=new r.SortedSet,r.jp.stopWordFilter.stopWords.length=45,r.jp.stopWordFilter.stopWords.elements=" これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし".split(" "),r.Pipeline.registerFunction(r.jp.stopWordFilter,"stopWordFilter-jp")}});
|
|
||||||
1493
static/js/lunr/tinyseg.js
Normal file
1493
static/js/lunr/tinyseg.js
Normal file
File diff suppressed because it is too large
Load diff
1
static/js/lunr/tinyseg.min.js
vendored
Normal file
1
static/js/lunr/tinyseg.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -173,6 +173,9 @@
|
||||||
{# Support correct stemming and stop word filtering in non-English search #}
|
{# Support correct stemming and stop word filtering in non-English search #}
|
||||||
{%- if lang != "en" -%}
|
{%- if lang != "en" -%}
|
||||||
<script defer src="{{ get_url(path='js/lunr/lunrStemmerSupport.min.js') | safe }}"></script>
|
<script defer src="{{ get_url(path='js/lunr/lunrStemmerSupport.min.js') | safe }}"></script>
|
||||||
|
{%- if lang == "ja" -%}
|
||||||
|
<script defer src="{{ get_url(path='js/lunr/tinyseg.min.js') | safe }}"></script>
|
||||||
|
{%- endif -%}
|
||||||
<script defer src="{{ get_url(path='js/lunr/lunr.' ~ lang ~ '.min.js') | safe }}"></script>
|
<script defer src="{{ get_url(path='js/lunr/lunr.' ~ lang ~ '.min.js') | safe }}"></script>
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue