Skip to content

Commit

Permalink
better fuzzy search
Browse files Browse the repository at this point in the history
  • Loading branch information
UlyssesZh committed Jan 4, 2024
1 parent e593912 commit 09b1ea1
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 21 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ COPY . .

RUN apt-get -y update
RUN apt-get -y upgrade
RUN apt-get install -y sqlite3 libsqlite3-dev
RUN apt-get install -y sqlite3 libsqlite3-dev opencc libopencc-dev

RUN bundle install

Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ gem 'concurrent-ruby'
gem 'discordrb'
gem 'sqlite3'
gem 'any_ascii'
gem 'ropencc'
5 changes: 4 additions & 1 deletion Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ GEM
http-accept (1.7.0)
http-cookie (1.0.5)
domain_name (~> 0.5)
mime-types (3.5.1)
mime-types (3.5.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.1205)
netrc (0.11.0)
Expand All @@ -29,6 +29,8 @@ GEM
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
ropencc (0.0.6)
ffi (~> 1.0)
sqlite3 (1.7.0-x86_64-linux)
websocket (1.2.10)
websocket-client-simple (0.8.0)
Expand All @@ -43,6 +45,7 @@ DEPENDENCIES
concurrent-ruby
discordrb
rake
ropencc
sqlite3

BUNDLED WITH
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ or copy the useful files to the paths specified in `config.yml`.
| `LYRICAT_RES_DIR` | The dir containing resource files. The base dir of items in `res` in config. Defaults to `./res`. |
| `LYRICAT_CONFIG` | The path to config file. Relative to `LYRICAT_DATA_DIR`. Defaults to `config.yml`. |
| `LYRICAT_DAN` | The path to the file specifying dan courses. Relative to `LYRICAT_DATA_DIR`. Defaults to `dan.yml`. |
| `LYRICAT_ALIASES` | The path to the file specifying aliases. Relative to `LYRICAT_DATA_DIR`. Defaults to `aliases.yml`. |
| `LYRICAT_THREAD_COUNT` | The number of threads to use for parallel HTTP requests. Defaults to 8. |
| `LYRICAT_RETRY_COUNT` | The number of retries when communicating with Lyrica's server. Defaults to 3. |
| `LYRICAT_STATIC_SESSION_TOKEN` | (Required) The session token used to retrieve leaderboards. |
Expand Down
17 changes: 17 additions & 0 deletions data/aliases.yml
Original file line number Diff line number Diff line change
@@ -1 +1,18 @@
---
songs:
源自苍穹: 26
蝶舞: 115
: 225
千魔王: 56
绿魔王: 50
狗咬: 202
destroyer: 197
d0: 197
脑力: 196
吃山人: 206
吞山人: 206
吃山者: 206
吞山者: 206
小红帽: 109
乡野整活: 27
神谕: 213
1 change: 1 addition & 0 deletions lib/lyricat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
require 'sqlite3'
require 'concurrent'
require 'any_ascii'
require 'ropencc'

require 'patches'

Expand Down
53 changes: 35 additions & 18 deletions lib/lyricat/song.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,30 +263,43 @@ def best n, sorted, session_token
end.filter { _1[:mr] > 0 }
end

def fuzzy_search lang, query
def fuzzy_search lang, query, excluded = []
if query.like_int?
id = query.to_i
return id if LIB[id]
end
if from_alias = ALIASES[query]
return from_alias
end
get_forms = ->original do
ascii = AnyAscii.transliterate original
abbr0 = original.upper_letters
abbr0 = nil if abbr0.length <= 1
abbr1 = original.split.map { _1[0] }.join
abbr1 = nil if abbr1.length <= 1
abbr2 = original.split(/[^\w]/).map { _1[0] }.join
abbr2 = nil if abbr2.length <= 1
[original, ascii, abbr0, abbr1, abbr2].compact
end
query_forms = get_forms.(query)
if query =~ /^(.*?)(\d+)$/
n = $2.to_i
alter_forms = get_forms.($1) if n > 1
searched = Set.new
end
filtered_lib = LIB.reject { excluded.include? _2.song_id }
[lang, nil].each do |l|
if o = LIB.find { _2.match_name query, true, l }
return o[0]
end
if o = LIB.find { _2.match_roman1 query, true, l }
return o[0]
end
if o = LIB.find { _2.match_roman2 query, true, l }
return o[0]
end
if o = LIB.find { _2.match_roman1 query, false, l }
return o[0]
end
if o = LIB.find { _2.match_roman2 query, false, l }
return o[0]
end
if o = LIB.find { _2.match_name query, false, l }
return o[0]
match = ->meth, strong = false do
filtered_lib.find do |song_id, song|
next true if query_forms.any? { song.__send__ meth, _1, strong, l }
if alter_forms&.any? { song.__send__ meth, _1, strong, l }
searched.add song_id
next true if n == searched.size
end
end&.first
end
o = match.(:match_name, true) || match.(:match_roman1, true) || match.(:match_roman2, true) || match.(:match_roman1) || match.(:match_roman2) || match.(:match_name)
return o if o
end
nil
end
Expand Down Expand Up @@ -342,6 +355,10 @@ def select_charts_by_difficulty diff, &block
labels[index].delete :index
negative
end.freeze

ALIASES = YAML.load_file(File.join Lyricat::DATA_DIR, ENV['LYRICAT_ALIASES'] || 'aliases.yml')['songs']
ALIASES.merge! ALIASES.transform_keys { Ropencc.conv 's2t', _1 }
ALIASES.freeze
end

end
2 changes: 1 addition & 1 deletion lib/lyricat/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Lyricat
VERSION = '0.5.0'
VERSION = '0.6.0'
end

0 comments on commit 09b1ea1

Please sign in to comment.