-
-
Save breezewish/807218232c3b8f3dd012bd3205692139 to your computer and use it in GitHub Desktop.
| const assert = require("assert").strict; | |
| const child_process = require("child_process"); | |
| const path = require("path"); | |
| const crypto = require("crypto"); | |
| const glob = require("glob"); | |
| const fs = require("fs-extra"); | |
| const _ = require("lodash"); | |
| const sort = require("alphanum-sort"); | |
| const Kuroshiro = require("kuroshiro"); | |
| const KuromojiAnalyzer = require("kuroshiro-analyzer-kuromoji"); | |
| const log = require("signale"); | |
| const chalk = require("chalk"); | |
| const xml2js = require("xml2js"); | |
| const kuroshiro = new Kuroshiro(); | |
| String.prototype.replaceAll = function (search, replacement) { | |
| var target = this; | |
| return target.split(search).join(replacement); | |
| }; | |
| let extractAudio = false; | |
| // Word counter is global to share the same note type | |
| let counter = 0; | |
| /** | |
| * Decrypt the resource data provided by the book app. | |
| */ | |
| function decryptDataV1(path, key) { | |
| const encData = fs.readFileSync(path); | |
| const iv = Buffer.alloc(128 / 8); | |
| const decipher = crypto.createDecipheriv("aes-128-cbc", key, iv); | |
| const data = []; | |
| data.push(decipher.update(encData)); | |
| data.push(decipher.final()); | |
| return Buffer.concat(data).toString("utf8"); | |
| } | |
| /** | |
| * Pad by filling leading zeros. Default pad to 2 digits. | |
| */ | |
| function pad(str, n = 2) { | |
| return _.padStart(str, n, "0"); | |
| } | |
| /** | |
| * Normalize symbols in the word. | |
| */ | |
| function normalizeWord(word) { | |
| return word | |
| .replace(/(/g, "(") | |
| .replace(/)/g, ")") | |
| .replace(///g, "/") | |
| .replace(/~/g, "~") | |
| .replace(/ /g, "") | |
| .replace(/\s+\(\s+/g, "(") | |
| .replace(/\s+\)\s+/g, ")") | |
| .replace(/\s+\/\s+/g, "/") | |
| .replace(/\s+~\s+/g, "~") | |
| .trim(); | |
| } | |
| /** | |
| * Build an Anki style Furigana by giving `{ prefix, kana, kanji, suffix }`. | |
| */ | |
| function buildAnkiFurigana(r) { | |
| const p = []; | |
| if (r.prefix.length > 0) { | |
| p.push(r.prefix); | |
| } | |
| if (r.kana || r.kanji) { | |
| assert.ok(r.kana.length > 0); | |
| assert.ok(r.kanji.length > 0); | |
| p.push(` ${r.kanji}[${r.kana}] `); | |
| } | |
| if (r.suffix.length > 0) { | |
| p.push(r.suffix); | |
| } | |
| return p.join(""); | |
| } | |
| /** | |
| * Find common prefix and suffix for the kana and kanji. | |
| * | |
| * Kana: こし, Kanji: 少し => Kana: こ, Kanji: 少, Suffix: し | |
| * @param {*} kana | |
| * @param {*} kanji | |
| */ | |
| function findKanaKanjiCommonString(kana, kanji) { | |
| let kanaChars = Array.from(kana); | |
| let kanjiChars = Array.from(kanji); | |
| const prefix = _.takeWhile(kanaChars, (ch, idx) => kanjiChars[idx] === ch); | |
| kanaChars = _.drop(kanaChars, prefix.length); | |
| kanjiChars = _.drop(kanjiChars, prefix.length); | |
| const suffix = _.takeRightWhile(kanaChars, (ch, idx) => { | |
| const rIdx = kanaChars.length - idx; | |
| return _.nth(kanjiChars, -rIdx) === ch; | |
| }); | |
| kanaChars = _.dropRight(kanaChars, suffix.length); | |
| kanjiChars = _.dropRight(kanjiChars, suffix.length); | |
| return { | |
| prefix: prefix.join(""), | |
| kana: kanaChars.join(""), | |
| kanji: kanjiChars.join(""), | |
| suffix: suffix.join(""), | |
| }; | |
| } | |
| (function __testFindKanaKanjiCommonString() { | |
| for (const [ | |
| inputKana, | |
| inputKanji, | |
| expectPrefix, | |
| expectKana, | |
| expectKanji, | |
| expectSuffix, | |
| ] of [ | |
| ["うち", "うち", "うち", "", "", ""], | |
| ["こし", "少し", "", "こ", "少", "し"], | |
| ["おねがいします", "お願いします", "お", "ねが", "願", "いします"], | |
| [ | |
| "お/ご~もうしあげる", | |
| "お/ご~申し上げる", | |
| "お/ご~", | |
| "もうしあ", | |
| "申し上", | |
| "げる", | |
| ], | |
| ]) { | |
| const result = findKanaKanjiCommonString(inputKana, inputKanji); | |
| assert.equal(result.prefix, expectPrefix); | |
| assert.equal(result.kana, expectKana); | |
| assert.equal(result.kanji, expectKanji); | |
| assert.equal(result.suffix, expectSuffix); | |
| } | |
| })(); | |
| /** | |
| * Transform a single furigana item in bracket format into Anki format. | |
| * Example: こし(少し) => 少[こ] し | |
| * See test cases below for all supported formats. | |
| */ | |
| async function transformFuriganaAsync(normalizedSingleWord) { | |
| // If there is no bracket, return as it is. | |
| // Sample: うち | |
| if (normalizedSingleWord.indexOf("(") === -1) { | |
| return normalizedSingleWord; | |
| } | |
| // Sample: こし(少し) => kana: こし, kanji: 少し | |
| const kana = normalizedSingleWord.replace(/\([^\)]+\)/g, "").trim(); | |
| const kanji = normalizedSingleWord.match(/\(([^\)]+)\)/)[1].trim(); | |
| // Always call `inferMissingKanaSimple` since it will extract common prefix / suffix even if there is no `~`. | |
| const r1 = inferMissingKanaSimple(kana, kanji); | |
| if (r1.indexOf("~") !== -1) { | |
| const r2 = await inferMissingKanaAdvancedAsync(kana, kanji); | |
| if (r2 !== false) { | |
| return r2; | |
| } | |
| } | |
| return r1; | |
| } | |
| async function __testTransformFuriganaAsync() { | |
| for (const [input, expect] of [ | |
| ["うち", "うち"], | |
| ["こし(少し)", "少[こ] し"], | |
| [ | |
| "お/ご~もうしあげる(お/ご~申し上げる)", | |
| "お/ご~ 申し上[もうしあ] げる", | |
| ], | |
| ["れんしゅうします(練習~)", "練習[れんしゅう] します"], | |
| ["さいようする(採用~)", "採用[さいよう] する"], | |
| ["イギリスじん(~人)", "イギリス 人[じん]"], | |
| ["じどうドア(自動~)", "自動[じどう] ドア"], | |
| ["おじゃまします(お邪魔~)", "お 邪魔[じゃま] します"], | |
| ["シャンハイパール(上海~)", "上海[シャンハイ] パール"], | |
| [ | |
| "よろしくおつたえください(~お伝えください)", | |
| "よろしくお 伝[つた] えください", | |
| ], | |
| [ | |
| "さどトキほごセンター(佐渡~保護~)", | |
| "佐渡[さど] トキ 保護[ほご] センター", | |
| ], | |
| ]) { | |
| const result = await transformFuriganaAsync(normalizeWord(input)); | |
| assert.equal(result, expect); | |
| } | |
| } | |
| function inferMissingKanaSimple(kana, kanji) { | |
| let prefix = ""; | |
| let suffix = ""; | |
| // If there is a `~` in the Kanji, try to fold it. | |
| // 1. trailing ~ may represent する or します | |
| // Sample: れんしゅうします(練習~) => 練習[れんしゅう] します | |
| if (kanji.endsWith("~")) { | |
| ["する", "ずる", "します"].forEach((kanaSuffix) => { | |
| if (kana.endsWith(kanaSuffix)) { | |
| suffix += kanaSuffix; | |
| kanji = _.dropRight(kanji.split(""), 1).join(""); | |
| kana = _.dropRight(kana.split(""), kanaSuffix.length).join(""); | |
| return false; | |
| } | |
| }); | |
| } | |
| // 2. leading ~ or trailing ~ may represent Katakana | |
| // Sample: イギリスじん(~人) => イギリス 人[じん] | |
| { | |
| const kanaChars = Array.from(kana); | |
| if (kanji.endsWith("~")) { | |
| const kSuffix = _.takeRightWhile(kanaChars, Kuroshiro.Util.isKatakana); | |
| if (kSuffix.length > 0 && kSuffix.length < kanaChars.length) { | |
| const v = kSuffix.join(""); | |
| suffix += v; | |
| kanji = _.dropRight(kanji.split(""), 1).join(""); | |
| kana = _.dropRight(kana.split(""), v.length).join(""); | |
| } | |
| } else if (kanji.startsWith("~")) { | |
| const kPrefix = _.takeWhile(kanaChars, Kuroshiro.Util.isKatakana); | |
| if (kPrefix.length > 0 && kPrefix.length < kanaChars.length) { | |
| const v = kPrefix.join(""); | |
| prefix += v; | |
| kanji = _.trimStart(kanji, "~"); | |
| kana = _.trimStart(kana, v); | |
| } | |
| } | |
| } | |
| assert.ok(kana.length > 0); | |
| assert.ok(kanji.length > 0); | |
| // Fold common prefix / suffix | |
| const r = findKanaKanjiCommonString(kana, kanji); | |
| prefix += r.prefix; | |
| suffix = r.suffix + suffix; | |
| kana = r.kana; | |
| kanji = r.kanji; | |
| return buildAnkiFurigana({ prefix, suffix, kana, kanji }).trim(); | |
| } | |
| async function inferMissingKanaAdvancedAsync(kana, kanji) { | |
| if (kanji.indexOf("~") === -1) { | |
| return false; | |
| } | |
| // FIXME: Sometimes this doesn't work, like だんボールばこ(段~箱) | |
| // transform kanji to hiragana / katakana, then match | |
| // Sample: さどトキほごセンター(佐渡~保護~) | |
| // 1. Split Kanji by ~: [佐渡, 保護, ] | |
| // 2. Transform Kanji: [さど/サド, ほご/ホゴ, ] | |
| // 3. Build regex: ^(さど|サド)(.*?)(ほご|ホゴ)(.*?)$ | |
| // 4. Match result: [さど, トキ, ほご, センター] | |
| // 5. Build final result: 佐渡[さど] トキ 保護[ほご] センター | |
| const kanjiSegments = await Promise.all( | |
| kanji.split("~").map(async (kanji) => { | |
| if (kanji.length === 0) { | |
| return { | |
| isEmpty: true, | |
| }; | |
| } | |
| const hiragana = await kuroshiro.convert(kanji, { to: "hiragana" }); | |
| const katakana = Kuroshiro.Util.kanaToKatakana(hiragana); | |
| return { | |
| isEmpty: false, | |
| kanji, | |
| hiragana, | |
| katakana, | |
| }; | |
| }) | |
| ); | |
| const rBody = kanjiSegments | |
| .map((s) => { | |
| if (s.isEmpty) { | |
| return ""; | |
| } | |
| return `(${_.escapeRegExp(s.hiragana)}|${_.escapeRegExp(s.katakana)})`; | |
| }) | |
| .join("(.*?)"); | |
| const r = new RegExp(`^${rBody}$`); | |
| const m = kana.match(r); | |
| if (!m) { | |
| return false; | |
| } | |
| const result = []; | |
| let iMatch = 1; | |
| for (let iKanji = 0; iKanji < kanjiSegments.length; iKanji++) { | |
| if (!kanjiSegments[iKanji].isEmpty) { | |
| const kana = m[iMatch]; | |
| const kanji = kanjiSegments[iKanji].kanji; | |
| const r = findKanaKanjiCommonString(kana, kanji); | |
| result.push(buildAnkiFurigana(r)); | |
| iMatch++; | |
| } | |
| if (iKanji !== kanjiSegments.length - 1) { | |
| result.push(m[iMatch]); | |
| iMatch++; | |
| } | |
| } | |
| assert.ok(iMatch, m.length); | |
| return result.join("").trim(); | |
| } | |
| async function _transformPlainFuriganaWordAsync(normalizedWord) { | |
| if (normalizedWord.match(/\([^\)\/]+\//)) { | |
| // If there is a `/` inside `(`, do not split the word by /. | |
| // Sample word: お/ご~もうしあげる(お/ご~申し上げる) | |
| // Only support <= 2 `/` in the word (1 in bracket, 0/1 outside bracket). | |
| assert.ok(normalizedWord.replace(/[^\/]/g, "").length <= 2, normalizedWord); | |
| return await transformFuriganaAsync(normalizedWord); | |
| } else { | |
| // If there is a `/` and it is not in the bracket, separate the word by `/`. | |
| // Samples: | |
| // うち | |
| // こし(少し)/ちょっと | |
| const subWords = normalizedWord.split("/"); | |
| const transformedSubWords = await Promise.all( | |
| subWords.map(transformFuriganaAsync) | |
| ); | |
| return transformedSubWords.join(" / "); | |
| } | |
| } | |
| /** | |
| * Transform a plain word contains furigana in the book format into Anki format. | |
| * See test cases below for all supported formats. | |
| */ | |
| async function transformPlainFuriganaWordAsync(normalizedWord) { | |
| let result = await _transformPlainFuriganaWordAsync(normalizedWord); | |
| result = await optimizeAnkiFuriganaAsync(result.trim()); | |
| // Additionally verifies that Kana from `normalizedWord` equals to Kana from result. | |
| assert.equal( | |
| extractKanaFromBookFormat(normalizedWord), | |
| extractKanaFromAnkiFormat(result) | |
| ); | |
| return result; | |
| } | |
| async function __testTransformPlainFuriganaWordAsync() { | |
| for (const [input, expect] of [ | |
| ["うち", "うち"], | |
| ["こし(少し)", "少[こ] し"], | |
| ["かならず(必ず)", "必[かなら] ず"], | |
| ["すこし(少し)/ちょっと", "少[すこ] し / ちょっと"], | |
| ["ごぜん(午前)/エーエム(am)", "午前[ごぜん] / am[エーエム]"], | |
| [ | |
| "チェコきょうわこく(~共和国)/チェコ", | |
| "チェコ 共和[きょうわ] 国[こく] / チェコ", // FIXME: missing space? | |
| ], | |
| [ | |
| "~時/~分/~半/~月/~日/~年/~ごろ", | |
| "~時 / ~分 / ~半 / ~月 / ~日 / ~年 / ~ごろ", | |
| ], | |
| [ | |
| "お/ご~もうしあげる(お/ご~申し上げる)", | |
| "お/ご~ 申し上[もうしあ] げる", | |
| ], | |
| ]) { | |
| const result = await transformPlainFuriganaWordAsync(normalizeWord(input)); | |
| assert.equal(result, expect); | |
| } | |
| } | |
| /** | |
| * Optimize a Furigana in Anki format by using kuroshiro to transform again. | |
| * If the kanas in transformed result is the same as the original one, use | |
| * the transformed result. | |
| * Sample: `中華大学[ちゅうかだいがく]` => `中華[ちゅうか]大学[だいがく]` | |
| */ | |
| async function optimizeAnkiFuriganaAsync(r) { | |
| // Strip all `[...]` from the Anki format. | |
| const kanji = r.replace(/\[[^\]]+\]/g, "").trim(); | |
| let r2 = await kuroshiro.convert(kanji, { | |
| mode: "furigana", // in order to preserve space between segments, use furigana | |
| to: "hiragana", | |
| }); | |
| r2 = r2 | |
| .replaceAll("<ruby>", " ") | |
| .replaceAll("<rp>(</rp><rt>", "[") | |
| .replaceAll("</rt><rp>)</rp></ruby>", "] ") | |
| .replace(/\s+/g, " ") | |
| .trim(); | |
| if (extractKanaFromAnkiFormat(r2) === extractKanaFromAnkiFormat(r)) { | |
| if (r2 !== r) { | |
| log.note("Optimized %s → %s", r, chalk.blue(r2)); | |
| } | |
| return r2; | |
| } | |
| return r; | |
| } | |
| /** | |
| * Extract Kana sequence from the book format by removing all brackets and non Kana characters. | |
| * Sample: `チェコきょうわこく(~共和国)/チェコ` => `チェコきょうわこくチェコ` | |
| */ | |
| function extractKanaFromBookFormat(w) { | |
| return w | |
| .replace(/\([^\)]+\)/g, "") | |
| .trim() | |
| .split("") | |
| .filter((ch) => Kuroshiro.Util.isKana(ch)) | |
| .join(""); | |
| } | |
| /** | |
| * Extract Kana sequence from the Anki format by preserving the bracket part and remove non Kana characters. | |
| * Sample: `午前[ごぜん] / am[エーエム]` => `ごぜんエーエム` | |
| */ | |
| function extractKanaFromAnkiFormat(w) { | |
| return w | |
| .split(" ") | |
| .map((seg) => { | |
| if (seg.indexOf("[") > -1) { | |
| return [...seg.matchAll(/\[([^\]]+)\]/g)] | |
| .map((v) => v[1].trim()) | |
| .join(""); | |
| } else { | |
| // If there is no `[`, use content directly | |
| return seg; | |
| } | |
| }) | |
| .join("") | |
| .split("") | |
| .filter((ch) => Kuroshiro.Util.isKana(ch)) | |
| .join(""); | |
| } | |
| /** | |
| * Transform a word contains furigana in the book format in XML into Anki format. | |
| */ | |
| async function transformXmlFuriganaWordAsync(xml) { | |
| const r = []; | |
| const nodes = await xml2js.parseStringPromise(xml); | |
| nodes.xml.tag.forEach((tag) => { | |
| if (!tag._) { | |
| return; | |
| } | |
| if (tag.$.superscript) { | |
| r.push(`${tag._.trim()}[${tag.$.superscript.trim()}]`); | |
| } else { | |
| r.push(tag._.trim()); | |
| } | |
| }); | |
| return r.join(" ").trim(); | |
| } | |
| async function __testTransformXmlFuriganaWordAsync() { | |
| for (const [input, expect] of [ | |
| [ | |
| "<xml><tag name='common' superscript='だい'>第</tag><tag name='common'>~ </tag></xml>", | |
| "第[だい] ~", | |
| ], | |
| [ | |
| "<xml><tag name='common'>~</tag><tag name='common' superscript='き'>気</tag><tag name='common' superscript='ど'>取</tag><tag name='common'>り</tag></xml>", | |
| "~ 気[き] 取[ど] り", | |
| ], | |
| ]) { | |
| const result = await transformXmlFuriganaWordAsync(normalizeWord(input)); | |
| assert.equal(result, expect); | |
| } | |
| } | |
| /** | |
| * Transform a word from the book into Anki format. | |
| */ | |
| async function transformWordAsync(word) { | |
| let w = normalizeWord(word); | |
| if (w.indexOf("<xml>") === -1) { | |
| // Sample: | |
| // かならず(必ず) => 必ず[かならず] | |
| return await transformPlainFuriganaWordAsync(w); | |
| } else if (w.indexOf("superscript") === -1) { | |
| // Samples: | |
| // <xml><tag name='common'>へいせい(平成)</tag></xml> | |
| const plainWord = w.replace(/\<[^\>]+\>/g, ""); | |
| return await transformPlainFuriganaWordAsync(plainWord); | |
| } else { | |
| // Sample: | |
| // <xml><tag name='common' superscript='だい'>第</tag><tag name='common'>~ </tag></xml> | |
| // Just follow the tag | |
| return await transformXmlFuriganaWordAsync(w); | |
| } | |
| } | |
| async function __testTransformWordAsync() { | |
| for (const [input, expect] of [ | |
| ["かならず(必ず)", "必[かなら] ず"], | |
| ["<xml><tag name='common'>へいせい(平成)</tag></xml>", "平成[へいせい]"], | |
| ]) { | |
| const result = await transformWordAsync(normalizeWord(input)); | |
| assert.equal(result, expect); | |
| } | |
| } | |
| function fixBrokenWord(word) { | |
| // There are two broken words in the original data | |
| const m = { | |
| "そば [名]": "そば", | |
| "このごろ [名]": "このごろ", | |
| }; | |
| for (let key in m) { | |
| if (word === key) { | |
| return m[key]; | |
| } | |
| } | |
| return word; | |
| } | |
| async function processBook(bookName, bookId, cipherKey, basePath) { | |
| log.start( | |
| "Process book %s in %s", | |
| chalk.green(bookName), | |
| chalk.green(basePath) | |
| ); | |
| const lessonPaths = sort(glob.sync(path.join(basePath, "*/lesson*"))); | |
| const outputDir = `./output/${bookName}/mp3`; | |
| const notes = []; | |
| fs.ensureDirSync(outputDir); | |
| for (const lessonPath of lessonPaths) { | |
| const m = lessonPath.match(/unit(\d+)\/lesson(\d+)/); | |
| if (!m) { | |
| throw new Error("Path not matching"); | |
| } | |
| const wordDataPath = path.join(lessonPath, "words.dat"); | |
| const wordData = JSON.parse(decryptDataV1(wordDataPath, cipherKey)); | |
| const words = wordData.content.filter((word) => word.linetype === "1"); | |
| const [unitIdx, lessonIdx] = m.slice(1); | |
| log.start( | |
| "Process book %s, unit %s, lesson %s", | |
| chalk.green(bookName), | |
| chalk.green(unitIdx), | |
| chalk.green(lessonIdx) | |
| ); | |
| for (let wordIdx = 0; wordIdx < words.length; wordIdx++) { | |
| const wordItem = words[wordIdx]; | |
| const audioName = [ | |
| `book${pad(bookId)}`, | |
| `lesson${pad(lessonIdx)}`, | |
| `${pad(wordIdx)}.mp3`, | |
| ].join("-"); | |
| const audioPath = path.join(outputDir, audioName); | |
| const ankiWord = await transformWordAsync(fixBrokenWord(wordItem.word)); | |
| if (wordItem.word.indexOf("~") > -1) { | |
| if (ankiWord.indexOf("~") === -1) { | |
| log.note( | |
| "Infer success: %s → %s", | |
| wordItem.word, | |
| chalk.yellow(ankiWord) | |
| ); | |
| } else { | |
| log.warn("Infer failed: %s → %s", wordItem.word, chalk.red(ankiWord)); | |
| } | |
| } | |
| const data = { | |
| id: pad(counter++, 4), | |
| jp: ankiWord, | |
| type: wordItem.wordtype.trim(), | |
| trans: wordItem.trans.trim() || "/", | |
| lesson: pad(lessonIdx), | |
| audio: audioName, | |
| tag: `${bookName}第${lessonIdx}课 ${bookName}第${unitIdx}单元`, | |
| }; | |
| notes.push(data); | |
| if (extractAudio) { | |
| child_process.spawnSync( | |
| "ffmpeg", | |
| [ | |
| "-i", | |
| path.join(lessonPath, "lesson_words.pepm"), | |
| "-codec:a", | |
| "libmp3lame", | |
| "-qscale:a", // reduce file size by setting a lower quality | |
| "6", | |
| "-ss", | |
| wordItem.starttime.trim(), | |
| "-to", | |
| wordItem.endtime.trim(), | |
| "-metadata", | |
| `title="${wordItem.word.trim()}"`, | |
| "-id3v2_version", | |
| "3", | |
| "-write_id3v1", | |
| "1", | |
| "-y", | |
| audioPath, | |
| ], | |
| { shell: true } | |
| ); | |
| } | |
| } | |
| } | |
| log.info("Writing data to %s", chalk.green(`./output/${bookName}/data.txt`)); | |
| fs.writeFileSync( | |
| `./output/${bookName}/data.txt`, | |
| notes | |
| .map((data) => | |
| [ | |
| data.id, | |
| data.jp, | |
| data.type, | |
| data.trans, | |
| data.lesson, | |
| `[sound:${data.audio}]`, | |
| data.tag, | |
| ].join("\t") | |
| ) | |
| .join("\n") | |
| ); | |
| } | |
| async function main() { | |
| // If you want to extract audio (it takes some time!): | |
| // extractAudio = true; | |
| await kuroshiro.init(new KuromojiAnalyzer()); | |
| log.start("Run pre-check test cases"); | |
| await __testTransformFuriganaAsync(); | |
| await __testTransformPlainFuriganaWordAsync(); | |
| await __testTransformXmlFuriganaWordAsync(); | |
| await __testTransformWordAsync(); | |
| await processBook("初级", 1, "@@www.pep.com.cn", "./resources/book1"); | |
| await processBook("中级", 2, "@@pepmres.com.cn", "./resources/book2"); | |
| await processBook("高级", 3, "@@pepmres.com.cn", "./resources/book3"); | |
| } | |
| main().catch((e) => console.error(e)); |
@xizhing8069 Updated
感谢分享!我对你的牌组做了些优化,但是是导出 text 修改的,主要是让日文 field 支持 anki 的假名混合显示,即改为 漢字[Kanji] 的形式,就可以显示出 漢 字 。代码在文末贴上,不知有没有帮助。
另外:
- 这么实现的话
假名field 可以删掉了节省空间。Cards 显示分别用{{kana:日文}}、{{kanji:日文}}和{{furigana:日文}}。 - 「解释到假名」和「假名到解释」不必分开两个包,Anki 的亮点之一是 Notes 和 Cards 分离,一条 Note 直接对应多种 Card Types 即可,不需要的可以很方便删掉。
- 样式中出现了多个
<hr id="answer">,应保留一个用以自动滚动,其它用<hr>。
const fs = require('fs')
const path = require('path')
const file = fs.readFileSync(path.join(__dirname, '新标准日本语初级(假名到解释).txt'), 'utf8')
const result = file.split('\n').map(line => {
const parts = line.split(' ')
const m = parts[4].match(/\((.*?)\)/)
if (m) {
parts[4] = `${m[1]}[${parts[4].replace(/\(.*?\)/, '')}]`
}
return parts.join(' ')
}).join('\n')
fs.writeFileSync(path.join(__dirname, '新标准日本语初级(假名到解释)2.txt'), result)修改后的样式:
@font-face {
font-family: IPAexMincho; src: url('_ipaexm.ttf');
}
.jp {
font-family: IPAexMincho;
}
.exp {
margin: 0 0 5px;
}
.card {
font-family: arial;
font-size: 20px;
text-align: center;
color: black;
background-color: white;
}以及背面,几种 Card Types 都可以通用
{{FrontSide}}
<hr id="answer">
<div class="jp">{{furigana:日文}}</div>
<hr>
<p class="exp">〔{{类型}}〕{{解释}}{{音频}}</p>
<small>[第 {{ 课号 }} 课]</small>
@crimx 可以分享一下你修改后的吗,对了不知道是不是可以制作一个需要输入的版本
@crimx 感谢分享,周末改进一下
@breeswish 多谢楼主分享
@crimx 请问这个要在哪里改呀,我本来想通过漢字来练习平假名,可是日文field把汉字和假名都包括了,有点无奈
@好的,感谢大佬解答,留下了没技术的泪水,要是能直接在card里用正则表达式把日文的一部分提出来就好了, 日文field:きっさてん(喫茶店),我只想显示括号里面的内容,能在card的样式里面实现吗?
你要改成 喫茶店[きっさてん] 然后卡牌模板用 {{kanji:日文}} 就会只显示汉字。
那要把所有的きっさてん(喫茶店)改成喫茶店[きっさてん],emmmmm,手动的话有点多。。。
是的,所以才用脚本转。
@YUMZF 尝试用规则推导了一些日文汉字的标注,更新了一下,可以再试试看新的(自行修改 Card 可以实现只显示日文汉字,或只显示假名)。https://zhuanlan.zhihu.com/p/58139619
想问一下, anki不是支持reversed 卡片吗?我把笔记类型改成reversed的了。 另外, 能否提供原始app数据呢? 想学习下如何用decryptDataV1 这个函数的原理....
想问一下, anki不是支持reversed 卡片吗?我把笔记类型改成reversed的了。 另外, 能否提供原始app数据呢? 想学习下如何用decryptDataV1 这个函数的原理....
@JacobSun 支持 reverse 卡片(当时做的时候不知道)。 原始 App 数据建议网上找破解版 Apk 或者别人已经下载好的数据包
@breeswish 我把所有卡组合并成一个了, 根据文档程序性能的考虑, anki推荐用大的卡组, 而不是多个小卡组, , 在大卡组里用tag创建筛选卡组. 还利用了anki的翻转卡片的功能. 自动翻转卡片功能能够正确对待同一个单词, 正反本质是同一个单词.
下载
https://mega.nz/file/zzIzgYRL#6xvwTudSiTmAcD4Oq0y4hJw8d18G8IebzM8PG8IeIas
另外我把音频放到正面了.听到声音, 回忆释义.
初级书第40课的第7词,中级书第7课的第7词和高级书的第8课的第118词都没有对应的语音(Anki可以通过左上角工具-检查媒体来确认).能不能更新一下Anki包?多谢...
@crimx 纯好奇, 请问大佬你是怎么找到 {{kana:日文}}、{{kanji:日文}} 这种用法的? 官方文档只有{{type:xxx}}, {{hint:xxx}} 之类, 我看了你的回答去查了源码才知道针对日语有模板.
感谢分享!请问有中级的单词包吗 @o@?