From 23d8b2d23f3db7810cc02c0980235f735bd94805 Mon Sep 17 00:00:00 2001 From: Spoked Date: Tue, 26 Mar 2024 21:05:46 -0400 Subject: [PATCH] feat: add more tests. more tweaks. add batch parsing. --- .github/ISSUE_TEMPLATE/---bug-report.yml | 5 +- .github/ISSUE_TEMPLATE/---feature-request.yml | 5 +- .github/ISSUE_TEMPLATE/---maintainers.yml | 5 +- .github/pull_request_template.md | 9 + .github/workflows/PULL_REQUEST_TEMPLATE.md | 8 - README.md | 129 ++++++++--- RTN/__init__.py | 3 +- RTN/fetch.py | 8 +- RTN/models.py | 81 ++++--- RTN/parser.py | 27 +++ RTN/patterns.py | 3 +- benchmarks/rank.py | 23 +- pyproject.toml | 2 +- tests/test_parser.py | 169 ++++++++++----- tests/test_ranker.py | 201 ++++++++++-------- 15 files changed, 429 insertions(+), 249 deletions(-) create mode 100644 .github/pull_request_template.md delete mode 100644 .github/workflows/PULL_REQUEST_TEMPLATE.md diff --git a/.github/ISSUE_TEMPLATE/---bug-report.yml b/.github/ISSUE_TEMPLATE/---bug-report.yml index 219443d..24f8c23 100644 --- a/.github/ISSUE_TEMPLATE/---bug-report.yml +++ b/.github/ISSUE_TEMPLATE/---bug-report.yml @@ -1,6 +1,9 @@ name: "\U0001F41E Bug Report" -labels: ["kind/bug", "status/triage"] description: "Rank Torrent Name (RTN) not working the way it is documented?" +title: "[Bug]: " +labels: ["kind/bug", "status/triage"] +assignees: + - dreulavelle body: - type: markdown diff --git a/.github/ISSUE_TEMPLATE/---feature-request.yml b/.github/ISSUE_TEMPLATE/---feature-request.yml index 482a767..c998e73 100644 --- a/.github/ISSUE_TEMPLATE/---feature-request.yml +++ b/.github/ISSUE_TEMPLATE/---feature-request.yml @@ -1,6 +1,9 @@ name: "\U0001F381 Feature Request" -labels: ["kind/feature", "status/triage"] description: "Did you find bugs, errors, or anything that isn't straightforward in the documentation?" +title: "[Feature]: " +labels: ["kind/feature", "status/triage"] +assignees: + - dreulavelle body: - type: markdown diff --git a/.github/ISSUE_TEMPLATE/---maintainers.yml b/.github/ISSUE_TEMPLATE/---maintainers.yml index a2bbc36..690c1b8 100644 --- a/.github/ISSUE_TEMPLATE/---maintainers.yml +++ b/.github/ISSUE_TEMPLATE/---maintainers.yml @@ -1,6 +1,9 @@ name: "\U0001F41E Request to Help in Development of RTN" -labels: ["kind/maintainer", "status/triage"] description: "Want to help in the development of Rank Torrent Name (RTN)?" +title: "[Help]: " +labels: ["kind/maintainer", "status/triage"] +assignees: + - dreulavelle body: - type: markdown diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 0000000..54f214c --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,9 @@ +# Pull Request Check List + +Resolves: #issue-number-here + +- [ ] Added **tests** for changed code. +- [ ] Updated **documentation** for changed code. + +## Description: + diff --git a/.github/workflows/PULL_REQUEST_TEMPLATE.md b/.github/workflows/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index a400ac6..0000000 --- a/.github/workflows/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,8 +0,0 @@ -# Pull Request Check List - -Resolves: #issue-number-here - -- [ ] Added **tests** for changed code. -- [ ] Updated **documentation** for changed code. - - \ No newline at end of file diff --git a/README.md b/README.md index 64f7b24..166e309 100644 --- a/README.md +++ b/README.md @@ -131,35 +131,6 @@ Torrent( lev_ratio=0.95 ) ``` -## Torrent Parser - -You can also parse a torrent title similar to how PTN works. This is an enhanced version of PTN that combines RTN's parsing as well. This also includes enhanced episode parsing as well that covers a much better range of titles. - -Using the example above: - -```py -from RTN import parse -parsed = parse("Example.Movie.2020.1080p.BluRay.x264-Example") - -print(parsed.parsed_title) # Output: "Example Movie" -print(parsed.year) # Output: [2020] -``` - -We also set **coherent_types** to `True` from the PTN data that get's combined with RTN parsed metadata. - -## Checking Title Similarity - -Sometimes, you might just want to check if two titles match closely enough, without going through the entire ranking process. RTN provides a simple function, title_match, for this purpose: - -```py -from RTN import title_match - -# Check if two titles are similar above a threshold of 0.9 -match = title_match("Correct Movie Title 2020", "Correct Movie Title (2020)") -print(match) # Output: True if similarity is above 0.9, otherwise False -``` - -This functionality is especially useful when you have a list of potential titles and want to find the best match for a given reference title. ## Understanding SettingsModel and RankingModel @@ -286,26 +257,76 @@ Keep in mind that these are explicitly set within RTN and are needed in order fo Create as many `SettingsModel` and `RankingModel` as you like to use anywhere in your code. They are mean't to be used as a way to version settings for your users. -## Real World Example +# Extras + +## Torrent Parser + +You can also parse a torrent title similar to how PTN works. This is an enhanced version of PTN that combines RTN's parsing as well. This also includes enhanced episode parsing as well that covers a much better range of titles. + +Using the example above: + +```py +from RTN import parse +parsed = parse("Example.Movie.2020.1080p.BluRay.x264-Example") + +print(parsed.parsed_data.raw_title) # Output: "Example.Movie.2020.1080p.BluRay.x264-Example" +print(parsed.parsed_data.parsed_title) # Output: "Example Movie" +print(parsed.parsed_data.year) # Output: [2020] +``` + +> :warning: We also set **coherent_types** to `True` from the PTN data that get's combined with RTN parsed metadata. +> This just ensures that all the types are uniform. **Everything is either a list of string or int's, or it's a boolean.** + +## Checking Title Similarity + +Sometimes, you might just want to check if two titles match closely enough, without going through the entire ranking process. RTN provides a simple function, title_match, for this purpose: + +```py +from RTN import title_match + +# Check if two titles are similar above a threshold of 0.9 +match = title_match("Correct Movie Title 2020", "Correct Movie Title (2020)") +print(match) # Output: True if similarity is above 0.9, otherwise False +``` + +This functionality is especially useful when you have a list of potential titles and want to find the best match for a given reference title. + +## Trash Check + +Maybe you just want to use our own garbage collector to weed out bad titles in your current scraping setup? + +```py +from RTN import check_trash + +if check_trash(raw_title): + # You can safely remove any title or item from being scraped if this returns True! + ... +``` + +# Real World Example Here is a crude example of how you could use RTN in scraping. ```py from RTN import RTN, Torrent, DefaultRanking -# Assuming 'settings' is defined somewhere and passed correctly +# Assuming 'settings' is defined somewhere and passed correctly. rtn = RTN(settings=settings, ranking_model=DefaultRanking()) ... -# Define some function for scraping for results.. +# Define some function for scraping for results from some API. if response.ok: torrents = set() for stream in response.streams: if not stream.infohash or not title_match(correct_title, stream.title): - # Skip results that don't match the query + # Skip results that don't match the query. + # We want to do this first to weed out torrents + # that are below the 90% match criteria. (Default is 90%) continue torrent: Torrent = rtn.rank(stream.title, stream.infohash) if torrent and torrent.fetch: - # Skip trash torrents by checking torrent.fetch + # Skip trash torrents by checking `torrent.fetch`. + # If torrent.fetch is True, then it's a good torrent, + # as considered by your ranking profile and settings model. torrents.add(torrent) # Sort the list of torrents based on their rank in descending order @@ -318,6 +339,46 @@ for torrent in sorted_torrents: print(f"Title: {torrent.parsed_data.parsed_title}, Infohash: {torrent.infohash}, Rank: {torrent.rank}") ``` +# ParsedData Structure + +Here is all of the attributes of `parsed_data` along with their default values: + +```py +class ParsedData(BaseModel): + """Parsed data model for a torrent title.""" + + raw_title: str + parsed_title: str + fetch: bool = False + is_4k: bool = False + is_multi_audio: bool = False + is_multi_subtitle: bool = False + is_complete: bool = False + year: List[int] = [] + resolution: List[str] = [] + quality: List[str] = [] + season: List[int] = [] + episode: List[int] = [] + codec: List[str] = [] + audio: List[str] = [] + subtitles: List[str] = [] + language: List[str] = [] + bitDepth: List[int] = [] + hdr: str | bool = False + proper: bool = False + repack: bool = False + remux: bool = False + upscaled: bool = False + remastered: bool = False + directorsCut: bool = False + extended: bool = False + excess: list = [] +``` + +This will continue to grow though as we expand on functionality, so keep checking back for this list! + +> :warning: Don't see something you want in the list? Submit a [Feature Request](https://github.com/dreulavelle/rank-torrent-name/issues/new?assignees=dreulavelle&labels=kind%2Ffeature%2Cstatus%2Ftriage&projects=&template=---feature-request.yml) to have it added! + ## Contributing Contributions to RTN are welcomed! Feel free to submit pull requests or open issues to suggest features or report bugs. As we grow, more features will be coming to RTN, there's already a lot planned! diff --git a/RTN/__init__.py b/RTN/__init__.py index 8262057..22d6f95 100644 --- a/RTN/__init__.py +++ b/RTN/__init__.py @@ -1,6 +1,6 @@ from .fetch import check_fetch, check_trash from .models import BaseRankingModel, DefaultRanking, ParsedData, SettingsModel -from .parser import RTN, Torrent, parse, sort, title_match +from .parser import RTN, Torrent, batch_parse, parse, sort, title_match from .patterns import parse_extras from .ranker import get_rank @@ -8,6 +8,7 @@ "RTN", "Torrent", "parse", + "batch_parse", "get_rank", "check_fetch", "check_trash", diff --git a/RTN/fetch.py b/RTN/fetch.py index f0817a9..b221f11 100644 --- a/RTN/fetch.py +++ b/RTN/fetch.py @@ -1,19 +1,21 @@ import regex from .models import ParsedData, SettingsModel -from .patterns import TRASH_COMPILED +from .patterns import IS_TRASH_COMPILED def check_trash(raw_title: str) -> bool: """Check if the title contains unwanted patterns.""" if not raw_title or not isinstance(raw_title, str): raise TypeError("The input title must be a non-empty string.") - return not any(pattern.search(raw_title) for pattern in TRASH_COMPILED) + # True if we find any of the trash patterns in the title. + # You can safely remove any title from being scraped if this returns True! + return any(pattern.search(raw_title) for pattern in IS_TRASH_COMPILED) def check_fetch(data: ParsedData, settings: SettingsModel) -> bool: """Check user settings and unwanted quality to determine if torrent should be fetched.""" - if not check_trash(data.raw_title): + if check_trash(data.raw_title): return False if settings.require and any( pattern.search(data.raw_title) for pattern in settings.require if pattern # type: ignore diff --git a/RTN/models.py b/RTN/models.py index 209b731..eec84d6 100644 --- a/RTN/models.py +++ b/RTN/models.py @@ -42,36 +42,41 @@ class BaseRankingModel(BaseModel): The ranking values are used to determine the quality of a media item based on its attributes. Attributes: - uhd (int): The ranking value for Ultra HD (4K) resolution. - fhd (int): The ranking value for Full HD (1080p) resolution. - hd (int): The ranking value for HD (720p) resolution. - sd (int): The ranking value for SD (480p) resolution. - bluray (int): The ranking value for Blu-ray quality. - hdr (int): The ranking value for HDR quality. - hdr10 (int): The ranking value for HDR10 quality. - dolby_video (int): The ranking value for Dolby video quality. - dts_x (int): The ranking value for DTS:X audio quality. - dts_hd (int): The ranking value for DTS-HD audio quality. - dts_hd_ma (int): The ranking value for DTS-HD Master Audio audio quality. - atmos (int): The ranking value for Dolby Atmos audio quality. - truehd (int): The ranking value for Dolby TrueHD audio quality. - ddplus (int): The ranking value for Dolby Digital Plus audio quality. - ac3 (int): The ranking value for AC3 audio quality. - aac (int): The ranking value for AAC audio quality. - remux (int): The ranking value for remux attribute. - webdl (int): The ranking value for web-dl attribute. - repack (int): The ranking value for repack attribute. - proper (int): The ranking value for proper attribute. - dubbed (int): The ranking value for dubbed attribute. - subbed (int): The ranking value for subbed attribute. - av1 (int): The ranking value for AV1 attribute. + `uhd` (int): The ranking value for Ultra HD (4K) resolution. + `fhd` (int): The ranking value for Full HD (1080p) resolution. + `hd` (int): The ranking value for HD (720p) resolution. + `sd` (int): The ranking value for SD (480p) resolution. + `bluray` (int): The ranking value for Blu-ray quality. + `hdr` (int): The ranking value for HDR quality. + `hdr10` (int): The ranking value for HDR10 quality. + `dolby_video` (int): The ranking value for Dolby video quality. + `dts_x` (int): The ranking value for DTS:X audio quality. + `dts_hd` (int): The ranking value for DTS-HD audio quality. + `dts_hd_ma` (int): The ranking value for DTS-HD Master Audio audio quality. + `atmos` (int): The ranking value for Dolby Atmos audio quality. + `truehd` (int): The ranking value for Dolby TrueHD audio quality. + `ddplus` (int): The ranking value for Dolby Digital Plus audio quality. + `ac3` (int): The ranking value for AC3 audio quality. + `aac` (int): The ranking value for AAC audio quality. + `remux` (int): The ranking value for remux attribute. + `webdl` (int): The ranking value for web-dl attribute. + `repack` (int): The ranking value for repack attribute. + `proper` (int): The ranking value for proper attribute. + `dubbed` (int): The ranking value for dubbed attribute. + `subbed` (int): The ranking value for subbed attribute. + `av1` (int): The ranking value for AV1 attribute. + + Note: + - The higher the ranking value, the better the quality of the media item. + - The default ranking values are set to 0, which means that the attribute does not affect the overall rank. + - Users can customize the ranking values based on their preferences and requirements by using inheritance. """ # resolution - uhd: int = 0 - fhd: int = 0 - hd: int = 0 - sd: int = 0 + uhd: int = 0 # 4K + fhd: int = 0 # 1080p + hd: int = 0 # 720p + sd: int = 0 # 480p # quality bluray: int = 0 hdr: int = 0 @@ -98,7 +103,7 @@ class BaseRankingModel(BaseModel): class DefaultRanking(BaseRankingModel): - """Default ranking model for users to use.""" + """Default ranking model preset that should cover most common use cases.""" uhd: int = 140 fhd: int = 100 @@ -209,12 +214,11 @@ def compile_and_validate_patterns(cls, values: dict[str, Any]) -> dict[str, Any] compiled_patterns = [] for pattern in raw_patterns: if isinstance(pattern, str): - # Compile the pattern, taking into account your custom syntax for options like case-sensitivity - if pattern.startswith("/") and pattern.endswith("/i"): + if pattern.startswith("/") and pattern.endswith("/i"): # case-insensitive compiled_patterns.append(regex.compile(pattern[1:-2], regex.IGNORECASE)) - elif pattern.startswith("/") and pattern.endswith("/"): + elif pattern.startswith("/") and pattern.endswith("/"): # case-sensitive compiled_patterns.append(regex.compile(pattern[1:-1])) - else: + else: # case-insensitive by default compiled_patterns.append(regex.compile(pattern, regex.IGNORECASE)) elif isinstance(pattern, regex.Pattern): # Keep already compiled patterns as is @@ -226,16 +230,3 @@ def compile_and_validate_patterns(cls, values: dict[str, Any]) -> dict[str, Any] class Config: arbitrary_types_allowed = True - - def __getitem__(self, key: str) -> CustomRank: - """Allows direct access to custom rank settings.""" - return self.custom_ranks.get(key, CustomRank()) - - def __setitem__(self, key: str, value: CustomRank): - """Enables setting custom rank settings.""" - self.custom_ranks[key] = value - - def __delitem__(self, key: str): - """Allows deletion of custom rank settings.""" - if key in self.custom_ranks: - del self.custom_ranks[key] diff --git a/RTN/parser.py b/RTN/parser.py index 5fb4de0..3276473 100644 --- a/RTN/parser.py +++ b/RTN/parser.py @@ -1,3 +1,4 @@ +from concurrent.futures import ThreadPoolExecutor, as_completed from typing import Any, List import Levenshtein @@ -124,6 +125,32 @@ def parse(raw_title: str) -> ParsedData: return ParsedData(**full_data) +def parse_chunk(chunk: List[str]) -> List[ParsedData]: + """Parses a chunk of torrent titles.""" + return [parse(title) for title in chunk] + + +def batch_parse(titles: List[str], chunk_size: int = 50) -> List[ParsedData]: + """ + Parses a list of torrent titles in batches for improved performance. + + Args: + titles (List[str]): A list of torrent titles to parse. + chunk_size (int): The number of titles to process in each batch. + + Returns: + List[ParsedData]: A list of ParsedData objects for each title. + """ + chunks = [titles[i:i + chunk_size] for i in range(0, len(titles), chunk_size)] + parsed_data = [] + with ThreadPoolExecutor() as executor: + future_to_chunk = {executor.submit(parse_chunk, chunk): chunk for chunk in chunks} + for future in as_completed(future_to_chunk): + chunk_result = future.result() + parsed_data.extend(chunk_result) + return parsed_data + + def title_match(correct_title: str, raw_title: str, threshold: float = 0.9) -> bool: """ Compares two titles using the Levenshtein ratio to determine similarity. diff --git a/RTN/patterns.py b/RTN/patterns.py index 604b754..b42e12a 100644 --- a/RTN/patterns.py +++ b/RTN/patterns.py @@ -8,7 +8,7 @@ def compile_patterns(patterns): # Pattern for identifying unwanted quality. This will set `parsed_data.fetch`. -TRASH_COMPILED = compile_patterns( +IS_TRASH_COMPILED = compile_patterns( [ r"\b(?:H[DQ][ .-]*)?CAM(?:H[DQ])?(?:[ .-]*Rip)?\b", r"\b(?:H[DQ][ .-]*)?S[ .-]*print\b", @@ -27,7 +27,6 @@ def compile_patterns(patterns): r"\bTrailers?\b", r"\b((Half.)?SBS|3D)\b", r"\bWEB[ .-]?DL[ .-]?Rip\b", - r"\bUm Actually|Captive Audience|Copycat Killers\b", ] ) diff --git a/benchmarks/rank.py b/benchmarks/rank.py index 6908e8b..c52302a 100644 --- a/benchmarks/rank.py +++ b/benchmarks/rank.py @@ -1,11 +1,30 @@ import pyperf -from RTN import RTN, DefaultRanking, SettingsModel +from RTN import RTN, DefaultRanking, SettingsModel, parse settings = SettingsModel() ranking_model = DefaultRanking() rtn = RTN(settings=settings, ranking_model=ranking_model) +def single_parse_benchmark_run(): + parse("The.Mandalorian.S01E02.1080p.DSNP.WEB-DL.x264") + +def multi_parse_benchmark_run(): + titles = [ + "The.Matrix.1999.1080p.BluRay.x264", + "Inception.2010.720p.BRRip.x264", + "Avengers.Endgame.2019.2160p.UHD.BluRay.x265", + "Interstellar.2014.IMAX.BDRip.x264", + "Game.of.Thrones.S01E01.1080p.WEB-DL.x264", + "Breaking.Bad.S05E14.720p.HDTV.x264", + "The.Witcher.S02E05.2160p.NF.WEBRip.x265", + "The.Mandalorian.S01E02.1080p.DSNP.WEB-DL.x264", + "1917.2019.1080p.BluRay.REMUX.AVC.DTS-HD.MA.5.1", + "Joker.2019.720p.BluRay.x264" + ] + for title in titles: + parse(title) + def single_benchmark_run(): rtn.rank("The.Matrix.1999.1080p.BluRay.x264", "30bfd9a796679bbeb0e110c17f32148ab8fd5746") @@ -26,5 +45,7 @@ def multi_benchmark_run(): rtn.rank(title, infohash) runner = pyperf.Runner() +runner.bench_func("Parsing Benchmark (1x)", single_parse_benchmark_run) +runner.bench_func("Parsing Benchmark (10x)", multi_parse_benchmark_run) runner.bench_func("Ranking Benchmark (1x)", single_benchmark_run) runner.bench_func("Ranking Benchmark (10x)", multi_benchmark_run) diff --git a/pyproject.toml b/pyproject.toml index 36ba6e1..8502cf8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rank-torrent-name" -version = "0.1.1" +version = "0.1.2" description = "Parse Torrents using PTN and Rank them according to your preferences!" authors = ["Spoked "] license = "MIT" diff --git a/tests/test_parser.py b/tests/test_parser.py index 3880e14..e91d997 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,10 +1,22 @@ import pytest -from RTN import get_rank, parse -from RTN.models import CustomRank, DefaultRanking, ParsedData, SettingsModel -from RTN.parser import title_match -from RTN.patterns import extract_episodes +from RTN import check_trash, get_rank, parse +from RTN.models import ( + BaseRankingModel, + CustomRank, + DefaultRanking, + ParsedData, + SettingsModel, +) +from RTN.parser import batch_parse, title_match +from RTN.patterns import ( + COMPLETE_SERIES_COMPILED, + MULTI_AUDIO_COMPILED, + MULTI_SUBTITLE_COMPILED, + check_pattern, + extract_episodes, +) ## Define Fixtures @@ -43,6 +55,9 @@ def rank_model(): ## Define Tests def test_default_ranking_model(rank_model): + assert isinstance(rank_model, BaseRankingModel) + # Mostly used for if I forget to update the tests/docs. + # Serves as a warning. assert rank_model.uhd == 140 assert rank_model.fhd == 100 assert rank_model.hd == 50 @@ -56,6 +71,7 @@ def test_default_ranking_model(rank_model): assert rank_model.webdl == 90 assert rank_model.bluray == -90 + def test_default_parse_return(custom_settings, rank_model): parsed = parse("The.Big.Bang.Theory.S01E01.720p.HDTV.x264-CTU") assert isinstance(parsed, ParsedData) @@ -64,6 +80,7 @@ def test_default_parse_return(custom_settings, rank_model): rank = get_rank(parsed, custom_settings, rank_model) assert rank > 5000, f"Rank was {rank} instead." + def test_default_title_matching(): """Test the title_match function""" # This ensures all titles adhere to having a levenshtein ratio > 0.9. @@ -79,6 +96,41 @@ def test_default_title_matching(): for title, query, expected in test_cases: assert title_match(title, query) == expected, f"Failed for {title} and {query}" + +def test_batch_parse_returns_correct_parsed_data_objects(): + test_titles = [ + "The.Matrix.1999.1080p.BluRay.x264", + "Inception.2010.720p.BRRip.x264", + "The Simpsons S01E01 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "The Simpsons S01E01E02 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "The Simpsons S01E01-E02 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "The Simpsons S01E01-E02-E03-E04-E05 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "The Simpsons S01E01E02E03E04E05 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "The Simpsons E1-200 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole" + "House MD All Seasons (1-8) 720p Ultra-Compressed" + "The Avengers (EMH) - S01 E15 - 459 (1080p - BluRay)" + "Witches Of Salem - 2Of4 - Road To Hell - Great Mysteries Of The World" + "Lost.[Perdidos].6x05.HDTV.XviD.[www.DivxTotaL.com]" + "4-13 Cursed (HD)" + "Dragon Ball Z Movie - 09 - Bojack Unbound - 1080p BluRay x264 DTS 5.1 -DDR" + "[F-D] Fairy Tail Season 1 - 6 + Extras [480P][Dual-Audio]" + "BoJack Horseman [06x01-08 of 16] (2019-2020) WEB-DLRip 720p" + "[HR] Boku no Hero Academia 87 (S4-24) [1080p HEVC Multi-Subs] HR-GZ" + "Bleach 10ยบ Temporada - 215 ao 220 - [DB-BR]" + "Naruto Shippuden - 107 - Strange Bedfellows" + "[224] Shingeki no Kyojin - S03 - Part 1 - 13 [BDRip.1080p.x265.FLAC]" + "[Erai-raws] Shingeki no Kyojin Season 3 - 11 [1080p][Multiple Subtitle]" + ] + + # Verify that each item in the result is an instance of ParsedData + # and its raw_title matches the corresponding input title + parsed_results = batch_parse(test_titles, chunk_size=5) + assert len(parsed_results) == len(test_titles) + for parsed_data, title in zip(parsed_results, test_titles): + assert isinstance(parsed_data, ParsedData), "Result item is not an instance of ParsedData" + assert parsed_data.raw_title == title, f"Expected raw_title to be '{title}', but got '{parsed_data.raw_title}'" + + def test_episode_parsing(): test_cases = [ # Regular Tests @@ -90,9 +142,12 @@ def test_episode_parsing(): ("The Simpsons E1-200 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole", list(range(1, 201))), # Eps 1-200 ("House MD All Seasons (1-8) 720p Ultra-Compressed", []), ("The Avengers (EMH) - S01 E15 - 459 (1080p - BluRay)", [15]), - ("Witches Of Salem - 2Of4 - Road To Hell - Great Mysteries Of The World", [2]), # mini-series, this is correct! ("Lost.[Perdidos].6x05.HDTV.XviD.[www.DivxTotaL.com]", [5]), ("4-13 Cursed (HD)", [13]), + + # Mini-series, this is correct! + ("Witches Of Salem - 2Of4 - Road To Hell - Great Mysteries Of The World", [2]), + # Anime Tests ("Dragon Ball Z Movie - 09 - Bojack Unbound - 1080p BluRay x264 DTS 5.1 -DDR", []), ("[F-D] Fairy Tail Season 1 - 6 + Extras [480P][Dual-Audio]", []), @@ -102,9 +157,10 @@ def test_episode_parsing(): # Looks like it doesn't handle hyphens in the episode part. It's not a big deal, # as it's not a common practice to use hypens in the episode part. Mostly seen in Anime. - ("Naruto Shippuden - 107 - Strange Bedfellows", []), # Incorrect. [107] - ("[224] Shingeki no Kyojin - S03 - Part 1 - 13 [BDRip.1080p.x265.FLAC]", []), # Incorrect. [13] - ("[Erai-raws] Shingeki no Kyojin Season 3 - 11 [1080p][Multiple Subtitle]", []) # Incorrect. [11] + # I did run tests and I was still able to scrape for Naruto, which is a huge win as its always been a tough one! + ("Naruto Shippuden - 107 - Strange Bedfellows", []), # Incorrect, should of been: [107] + ("[224] Shingeki no Kyojin - S03 - Part 1 - 13 [BDRip1080p.x265.FLAC]", []), # Incorrect, should of been: [13] + ("[Erai-raws] Shingeki no Kyojin Season 3 - 11 [1080p][Multiple Subtitle]", []) # Incorrect, should of been: [11] ] for test_string, expected in test_cases: assert ( @@ -112,53 +168,52 @@ def test_episode_parsing(): ), f"Failed for '{test_string}' with expected {expected}" +def test_multi_audio_patterns(): + test_cases = [ + ("Lucy 2014 Dual-Audio WEBRip 1400Mb", True), + ("Darkness Falls (2020) HDRip 720p [Hindi-Dub] Dual-Audio x264", True), + ("The Simpsons - Season 1 Complete [DVDrip ITA ENG] TNT Village", False), + ("Brave.2012.R5.DVDRip.XViD.LiNE-UNiQUE", False), + ] + for test_string, expected in test_cases: + assert check_pattern(MULTI_AUDIO_COMPILED, test_string) == expected + + +def test_multi_subtitle_patterns(): + test_cases = [ + ( + "IP Man And Four Kings 2019 HDRip 1080p x264 AAC Mandarin HC CHS-ENG SUBS Mp4Ba", + True, + ), + ("The Simpsons - Season 1 Complete [DVDrip ITA ENG] TNT Village", True), + ("The.X-Files.S01.Retail.DKsubs.720p.BluRay.x264-RAPiDCOWS", False), + ("Hercules (2014) WEBDL DVDRip XviD-MAX", False), + ] + for test_string, expected in test_cases: + assert check_pattern(MULTI_SUBTITLE_COMPILED, test_string) == expected -# def test_multi_audio_patterns(): -# test_cases = [ -# ("Lucy 2014 Dual-Audio WEBRip 1400Mb", True), -# ("Darkness Falls (2020) HDRip 720p [Hindi-Dub] Dual-Audio x264", True), -# ("The Simpsons - Season 1 Complete [DVDrip ITA ENG] TNT Village", False), -# ("Brave.2012.R5.DVDRip.XViD.LiNE-UNiQUE", False), -# ] -# for test_string, expected in test_cases: -# assert check_multi_audio(test_string) == expected - - -# def test_multi_subtitle_patterns(): -# test_cases = [ -# ( -# "IP Man And Four Kings 2019 HDRip 1080p x264 AAC Mandarin HC CHS-ENG SUBS Mp4Ba", -# True, -# ), -# ("The Simpsons - Season 1 Complete [DVDrip ITA ENG] TNT Village", True), -# ("The.X-Files.S01.Retail.DKsubs.720p.BluRay.x264-RAPiDCOWS", False), -# ("Hercules (2014) WEBDL DVDRip XviD-MAX", False), -# ] -# for test_string, expected in test_cases: -# assert check_multi_subtitle(test_string) == expected - - -# def test_complete_series_patterns(): -# test_cases = [ -# ( -# "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras", -# True, -# ), -# ("The Inbetweeners Collection", True), -# ("The Simpsons S01 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole", False), -# ("Two and a Half Men S12E01 HDTV x264 REPACK-LOL [eztv]", False), -# ] -# for test_string, expected in test_cases: -# assert check_complete_series(test_string) == expected - - -# def test_unwanted_quality_patterns(): -# # False means the pattern is unwanted, and won't be fetched. -# test_cases = [ -# ("Mission.Impossible.1996.Custom.Audio.1080p.PL-Spedboy", True), -# ("Casino.1995.MULTi.REMUX.2160p.UHD.Blu-ray.HDR.HEVC.DTS-X7.1-DENDA", True), -# ("Guardians of the Galaxy (CamRip / 2014)", False), -# ("Brave.2012.R5.DVDRip.XViD.LiNE-UNiQUE", False), -# ] -# for test_string, expected in test_cases: -# assert check_unwanted_quality(test_string) == expected + +def test_complete_series_patterns(): + test_cases = [ + ( + "The Sopranos - The Complete Series (Season 1, 2, 3, 4, 5 & 6) + Extras", + True, + ), + ("The Inbetweeners Collection", True), + ("The Simpsons S01 1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole", False), + ("Two and a Half Men S12E01 HDTV x264 REPACK-LOL [eztv]", False), + ] + for test_string, expected in test_cases: + assert check_pattern(COMPLETE_SERIES_COMPILED, test_string) == expected + + +def test_check_if_string_is_trash(): + # True means the string is unwanted, and won't be fetched. + test_cases = [ + ("Mission.Impossible.1996.Custom.Audio.1080p.PL-Spedboy", False), + ("Casino.1995.MULTi.REMUX.2160p.UHD.Blu-ray.HDR.HEVC.DTS-X7.1-DENDA", False), + ("Guardians of the Galaxy (CamRip / 2014)", True), # CamRip + ("Brave.2012.R5.DVDRip.XViD.LiNE-UNiQUE", True), # R5, LiNE + ] + for test_string, expected in test_cases: + assert check_trash(test_string) == expected diff --git a/tests/test_ranker.py b/tests/test_ranker.py index b7f7d61..7f6537b 100644 --- a/tests/test_ranker.py +++ b/tests/test_ranker.py @@ -1,94 +1,107 @@ -# import pytest -# from PTN import ( -# BaseRankingModel, -# DefaultRanking, -# ParsedData, -# SettingsModel, -# Torrent, -# get_rank, -# parse, -# ) - - -# @pytest.fixture -# def settings_model(): -# return SettingsModel() - -# @pytest.fixture -# def custom_settings(): -# return SettingsModel( -# profile="custom", -# require=[], -# exclude=[], -# preferred=[r"\bS\d+"], -# custom_ranks={ -# "uhd": {"enable": True, "fetch": True, "rank": -200}, -# "fhd": {"enable": True, "fetch": True, "rank": 90}, -# "hd": {"enable": True, "fetch": True, "rank": 60}, -# "sd": {"enable": True, "fetch": True, "rank": -120}, -# "dolby_video": {"enable": True, "fetch": True, "rank": -1000}, -# "hdr": {"enable": True, "fetch": True, "rank": -1000}, -# "hdr10": {"enable": True, "fetch": True, "rank": -1000}, -# "aac": {"enable": True, "fetch": True, "rank": 70}, -# "ac3": {"enable": True, "fetch": True, "rank": 50}, -# "remux": {"enable": False, "fetch": True, "rank": -75}, -# "webdl": {"enable": True, "fetch": True, "rank": 90}, -# "bluray": {"enable": True, "fetch": True, "rank": -90}, -# }, -# ) - - -# test_data = [ -# ( -# "Jumanji (1995) RM4K (1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole", -# { -# "raw_title": "Jumanji (1995) RM4K (1080p BluRay x265 HEVC 10bit AAC 5.1 Tigole", -# "parsed_title": "Jumanji", -# "fetch": True, -# "year": [1995], -# "resolution": ["1080p"], -# "quality": ["Blu-ray"], -# "codec": ["H.265"], -# "audio": ["AAC 5.1"], -# "bitDepth": [10], -# }, -# ), -# ( -# "The Simpsons - Complete Seasons S01 to S28 (1080p, 720p, DVDRip)", -# { -# "raw_title": "The Simpsons - Complete Seasons S01 to S28 (1080p, 720p, DVDRip)", -# "parsed_title": "The Simpsons", -# "fetch": True, -# "is_complete": True, -# "resolution": ["1080p"], -# "quality": ["DVD-Rip"], -# "season": list(range(1, 29)), -# }, -# ), -# ] - -# test_ids = ["FullQualityCheck", "SeasonRangeCheck"] - -# def test_valid_torrent_from_item(): -# ranking_model = DefaultRanking() -# torrent = Torrent( -# ranking_model=ranking_model, -# raw_title="The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]", -# infohash="1234567890", -# ) - -# assert isinstance(torrent, Torrent) -# assert isinstance(torrent.parsed_data, ParsedMediaItem) -# assert torrent.raw_title == "The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]" -# assert torrent.infohash == "1234567890" -# assert torrent.parsed_data.parsed_title == "The Walking Dead" -# assert torrent.parsed_data.fetch is True -# assert torrent.rank == 163, f"Rank was {torrent.rank} instead of 163" - -# @pytest.mark.parametrize("raw_title, expected", test_data, ids=test_ids) -# def test_parsed_media_item_properties(raw_title: str, expected: dict): -# item = ParsedMediaItem(raw_title=raw_title) -# for key, value in expected.items(): -# assert ( -# getattr(item, key) == value -# ), f"Attribute {key} failed for raw_title: {raw_title}" \ No newline at end of file +import pytest + +from RTN import RTN +from RTN.models import ( + BaseRankingModel, + CustomRank, + DefaultRanking, + ParsedData, + SettingsModel, +) +from RTN.parser import Torrent + + +@pytest.fixture +def ranking_model(): + return DefaultRanking() + +@pytest.fixture +def custom_ranking_model(): + return BaseRankingModel( + uhd=140, + fhd=100, + hd=50, + sd=-100, + dolby_video=-1000, + hdr=-1000, + hdr10=-1000, + aac=70, + ac3=50, + remux=-75, + webdl=90, + bluray=-90, + ) + +@pytest.fixture +def settings_model(): + return SettingsModel() + +@pytest.fixture +def custom_settings_model(): + return SettingsModel( + profile="custom", + require=[], + exclude=[], + preferred=["BluRay", r"/\bS\d+/", "/HDR|HDR10/i"], + custom_ranks={ + "uhd": CustomRank(enable=True, fetch=True, rank=-200), + "fhd": CustomRank(enable=True, fetch=True, rank=90), + "hd": CustomRank(enable=True, fetch=True, rank=60), + "sd": CustomRank(enable=True, fetch=True, rank=-120), + "dolby_video": CustomRank(enable=True, fetch=True, rank=-1000), + "hdr": CustomRank(enable=True, fetch=True, rank=-1000), + "hdr10": CustomRank(enable=True, fetch=True, rank=-1000), + "aac": CustomRank(enable=True, fetch=True, rank=70), + "ac3": CustomRank(enable=True, fetch=True, rank=50), + "remux": CustomRank(enable=False, fetch=True, rank=-75), + "webdl": CustomRank(enable=True, fetch=True, rank=90), + "bluray": CustomRank(enable=True, fetch=True, rank=-90), + }, + ) + + +def test_valid_torrent_from_title(settings_model, ranking_model): + rtn = RTN(settings_model, ranking_model) + + torrent = rtn.rank("The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]", + "c08a9ee8ce3a5c2c08865e2b05406273cabc97e7") + + assert isinstance(torrent, Torrent) + assert isinstance(torrent.parsed_data, ParsedData) + assert torrent.raw_title == "The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]" + assert torrent.infohash == "c08a9ee8ce3a5c2c08865e2b05406273cabc97e7" + assert torrent.parsed_data.parsed_title == "The Walking Dead" + assert torrent.parsed_data.fetch is False + assert torrent.rank > 0, f"Rank was {torrent.rank} instead of 163" + assert torrent.lev_ratio > 0.0, f"Levenshtein ratio was {torrent.lev_ratio} instead of > 0.0" + +def test_invalid_torrent_from_title(settings_model, ranking_model): + rtn = RTN(settings_model, ranking_model) + + with pytest.raises(TypeError): + # Missing 2 string arguments + rtn.rank("c08a9ee8ce3a5c2c08865e2b05406273cabc97e7") # type: ignore + + with pytest.raises(ValueError): + # Missing title + rtn.rank(None, "c08a9ee8ce3a5c2c08865e2b05406273cabc97e7") # type: ignore + + with pytest.raises(ValueError): + # Missing infohash + rtn.rank("The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]", None) # type: ignore + + with pytest.raises(ValueError): + # Missing title and infohash + rtn.rank(None, None) # type: ignore + + with pytest.raises(TypeError): + # Invalid title type + rtn.rank(123, "c08a9ee8ce3a5c2c08865e2b05406273cabc97e7") # type: ignore + + with pytest.raises(TypeError): + # Invalid infohash type + rtn.rank("The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]", 123) # type: ignore + + with pytest.raises(ValueError): + # Invalid infohash length + rtn.rank("The Walking Dead S05E03 720p HDTV x264-ASAP[ettv]", "c08a9ee8ce3a5c2c0886") \ No newline at end of file