diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 7fd4891..6e2c6fc 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -10,8 +10,8 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
- os: [ubuntu-latest, macos-latest]
- python-version: ['3.8', '3.9', '3.10']
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ python-version: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Checkout repository
@@ -35,7 +35,7 @@ jobs:
- name: Download resources
run: |
python3 -m pip install .
- python3 -m perke download ${{ secrets.GITHUB_TOKEN }}
+ python3 -m perke download
- name: Run tests
run: pytest
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a7914cf..cc9811a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,7 +18,7 @@ repos:
- --profile=black
- --line-length=79
- repo: https://github.com/psf/black
- rev: 23.1.0
+ rev: 23.3.0
hooks:
- id: black
args:
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf9ba3e..5741f3a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,13 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
+### Added
+- Added support for Python `3.11`
+- Brought Windows tests back
+
+### Changed
+- Adopted Perke with all changes in
+ [Hazm](https://github.com/roshan-research/hazm) `0.9` release
## [0.4.1] - 2023-03-15
### Fixed
@@ -32,7 +39,8 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
- Changed CI from Travis CI to GitHub workflows
### Removed
-- Removed Windows tests since hazm runs on WSL and WSL tests is same as Linux
+- Removed Windows tests since [Hazm](https://github.com/roshan-research/hazm)
+ runs on WSL and WSL tests is same as Linux
### Fixed
- Removed type hints from docstrings
diff --git a/README.md b/README.md
index 9e39c16..c7b1922 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
[![pre-commit.ci](https://results.pre-commit.ci/badge/github/AlirezaTheH/perke/main.svg)](https://results.pre-commit.ci/latest/github/alirezatheh/perke/main)
[![PyPI Version](https://img.shields.io/pypi/v/perke)](https://pypi.python.org/pypi/perke)
[![Python Versions](https://img.shields.io/pypi/pyversions/perke)](https://pypi.org/project/perke)
-[![Documentation Status](https://readthedocs.org/projects/perke/badge/?version=stable)](https://perke.readthedocs.io/en/latest/?badge=stable)
+[![Documentation Status](https://readthedocs.org/projects/perke/badge/?version=stable)](https://perke.readthedocs.io/en/stable/?badge=stable)
Perke is a Python keyphrase extraction package for Persian language. It
provides an end-to-end keyphrase extraction pipeline in which each component
diff --git a/examples/unsupervised/graph_based/multipartite_rank.py b/examples/unsupervised/graph_based/multipartite_rank.py
index d117d57..552d250 100644
--- a/examples/unsupervised/graph_based/multipartite_rank.py
+++ b/examples/unsupervised/graph_based/multipartite_rank.py
@@ -3,7 +3,7 @@
from perke.unsupervised.graph_based import MultipartiteRank
# Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
# 1. Create a MultipartiteRank extractor.
extractor = MultipartiteRank(valid_pos_tags=valid_pos_tags)
diff --git a/examples/unsupervised/graph_based/position_rank.py b/examples/unsupervised/graph_based/position_rank.py
index 9e6ef41..b6dee76 100644
--- a/examples/unsupervised/graph_based/position_rank.py
+++ b/examples/unsupervised/graph_based/position_rank.py
@@ -3,15 +3,15 @@
from perke.unsupervised.graph_based import PositionRank
# Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'NOUN,EZ', 'ADJ', 'ADJ,EZ'}
# Define the grammar for selecting the keyphrase candidates
grammar = r"""
NP:
-
{}
+ {}
NP:
- {*}
- }{<.*e?>
+ {*}
+ }{<.*(,EZ)?>
"""
# 1. Create a PositionRank extractor.
@@ -19,7 +19,11 @@
# 2. Load the text.
input_filepath = Path(__file__).parent.parent.parent / 'input.txt'
-extractor.load_text(input=input_filepath, word_normalization_method=None)
+extractor.load_text(
+ input=input_filepath,
+ word_normalization_method=None,
+ universal_pos_tags=False,
+)
# 3. Select the noun phrases up to 3 words as keyphrase candidates.
extractor.select_candidates(grammar=grammar, maximum_word_number=3)
diff --git a/examples/unsupervised/graph_based/single_rank.py b/examples/unsupervised/graph_based/single_rank.py
index 2a94a96..569d78f 100644
--- a/examples/unsupervised/graph_based/single_rank.py
+++ b/examples/unsupervised/graph_based/single_rank.py
@@ -3,7 +3,7 @@
from perke.unsupervised.graph_based import SingleRank
# Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
# 1. Create a SingleRank extractor.
extractor = SingleRank(valid_pos_tags=valid_pos_tags)
diff --git a/examples/unsupervised/graph_based/text_rank.py b/examples/unsupervised/graph_based/text_rank.py
index 69a6603..78a8276 100644
--- a/examples/unsupervised/graph_based/text_rank.py
+++ b/examples/unsupervised/graph_based/text_rank.py
@@ -3,7 +3,7 @@
from perke.unsupervised.graph_based import TextRank
# Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
# 1. Create a TextRank extractor.
extractor = TextRank(valid_pos_tags=valid_pos_tags)
diff --git a/examples/unsupervised/graph_based/topic_rank.py b/examples/unsupervised/graph_based/topic_rank.py
index ba619aa..b10bab9 100644
--- a/examples/unsupervised/graph_based/topic_rank.py
+++ b/examples/unsupervised/graph_based/topic_rank.py
@@ -1,15 +1,15 @@
-from os.path import dirname, join
+from pathlib import Path
from perke.unsupervised.graph_based import TopicRank
# Define the set of valid part of speech tags to occur in the model.
-valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+valid_pos_tags = {'NOUN', 'ADJ'}
# 1. Create a TopicRank extractor.
extractor = TopicRank(valid_pos_tags=valid_pos_tags)
# 2. Load the text.
-input_filepath = join(dirname(dirname(dirname(__file__))), 'input.txt')
+input_filepath = Path(__file__).parent.parent.parent / 'input.txt'
extractor.load_text(input=input_filepath, word_normalization_method='stemming')
# 3. Select the longest sequences of nouns and adjectives, that do
diff --git a/perke/base/extractor.py b/perke/base/extractor.py
index aea3b60..9e3e724 100644
--- a/perke/base/extractor.py
+++ b/perke/base/extractor.py
@@ -45,7 +45,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags:
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'ADJ'}`.
"""
self.word_normalization_method: Optional[str] = None
self.sentences: List[Sentence] = []
@@ -54,13 +54,14 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
punctuation_marks
)
if valid_pos_tags is None:
- valid_pos_tags = {'N', 'Ne', 'AJ', 'AJe'}
+ valid_pos_tags = {'NOUN', 'ADJ'}
self.valid_pos_tags: Set[str] = valid_pos_tags
def load_text(
self,
input: Union[str, Path],
word_normalization_method: WordNormalizationMethod = 'stemming',
+ universal_pos_tags: bool = True,
) -> None:
"""
Loads the text of a document or string.
@@ -74,9 +75,15 @@ def load_text(
Word normalization method, defaults to `'stemming'`. See
`perke.base.types.WordNormalizationMethod` for available
methods.
+
+ universal_pos_tags:
+ Whether to use universal part of speech tags or not,
+ defaults to `True`.
"""
# Initialize reader
- reader = RawTextReader(input, word_normalization_method)
+ reader = RawTextReader(
+ input, word_normalization_method, universal_pos_tags
+ )
# Load sentences
self.sentences = reader.read()
@@ -225,7 +232,7 @@ def _add_candidate_occurrence(
The offset of the occurrence
normalized_words:
- List of normalized of words of the occurrence
+ List of normalized words of the occurrence
"""
# Build the canonical form of the candidate
canonical_form = ' '.join(normalized_words)
@@ -306,7 +313,7 @@ def _select_candidates_with_longest_sequences(
first = sequence_offsets[0]
last = sequence_offsets[-1]
- # Add the ngram as a new candidate occurrence
+ # Add the n-gram as a new candidate occurrence
self._add_candidate_occurrence(
words=sentence.words[first : last + 1],
offset=offset_shift + first,
@@ -336,20 +343,20 @@ def _select_candidates_with_grammar(
defaults to::
r\"""
NP:
- {}
+ {}
NP:
- {*}
- }{<.*e?>'
+ {*}
+ }{<.*(,EZ)?>
\"""
"""
# Initialize default grammar if none provided
if grammar is None:
grammar = r"""
NP:
- {}
+ {}
NP:
- {*}
- }{<.*e?>
+ {*}
+ }{<.*(,EZ)?>
"""
# Initialize parser
diff --git a/perke/base/readers.py b/perke/base/readers.py
index 6e279e8..29eb08a 100644
--- a/perke/base/readers.py
+++ b/perke/base/readers.py
@@ -1,4 +1,3 @@
-from os.path import dirname, join
from pathlib import Path
from typing import List
@@ -31,7 +30,9 @@ class Reader:
"""
def __init__(
- self, word_normalization_method: WordNormalizationMethod
+ self,
+ word_normalization_method: WordNormalizationMethod,
+ universal_pos_tags: bool,
) -> None:
"""
Initializes the reader.
@@ -42,6 +43,9 @@ def __init__(
Word normalization method, see
`perke.base.types.WordNormalizationMethod` for available
methods.
+
+ universal_pos_tags:
+ Whether to use universal part of speech tags or not
"""
self.word_normalization_method: WordNormalizationMethod = (
word_normalization_method
@@ -49,10 +53,12 @@ def __init__(
self.normalizer: hazm.Normalizer = hazm.Normalizer()
self.stemmer: hazm.Stemmer = hazm.Stemmer()
self.lemmatizer: hazm.Lemmatizer = hazm.Lemmatizer()
- model_filepath = join(
- dirname(dirname(__file__)), 'resources', 'postagger.model'
+ self.pos_tagger: hazm.POSTagger = hazm.POSTagger(
+ model=str(
+ Path(__file__).parent.parent / 'resources' / 'pos_tagger.model'
+ ),
+ universal_tag=universal_pos_tags,
)
- self.pos_tagger: hazm.POSTagger = hazm.POSTagger(model=model_filepath)
class RawTextReader(Reader):
@@ -69,6 +75,7 @@ def __init__(
self,
input: str,
word_normalization_method: WordNormalizationMethod,
+ universal_pos_tags,
) -> None:
"""
Initializes the reader.
@@ -82,8 +89,11 @@ def __init__(
Word normalization method, see
`perke.base.types.WordNormalizationMethod` for available
methods.
+
+ universal_pos_tags:
+ Whether to use universal part of speech tags or not
"""
- super().__init__(word_normalization_method)
+ super().__init__(word_normalization_method, universal_pos_tags)
# If input is a filepath
if isinstance(input, Path):
diff --git a/perke/cli/download.py b/perke/cli/download.py
index 242d597..99e10b7 100644
--- a/perke/cli/download.py
+++ b/perke/cli/download.py
@@ -1,91 +1,29 @@
-from io import BytesIO
from pathlib import Path
-from typing import Optional
-from zipfile import ZipFile
-import requests
-import rich_click.typer as typer
-from github import Github
-from github.GitReleaseAsset import GitReleaseAsset
+import gdown
from perke.cli.base import app
@app.command('download')
-def download_command(
- github_token: Optional[str] = typer.Argument(
- None,
- help=(
- 'The GitHub token to use with GitHub API in order to avoid rate'
- 'limit'
- ),
- ),
-) -> None:
+def download_command() -> None:
"""
Perke requires a trained POS tagger model. We use hazm's tagger
model. This command aims to easily download latest hazm's resources
(tagger and parser models).
"""
- download(github_token)
+ download()
-def download(github_token: Optional[str] = None) -> None:
+def download() -> None:
"""
Function version of `download_command` to be available in the
package.
"""
- asset = get_latest_resources_asset(github_token)
- extract_path = Path(__file__).parent.parent / 'resources'
- download_and_extract_asset(asset, extract_path)
-
-
-def get_latest_resources_asset(github_token: str) -> GitReleaseAsset:
- """
- Searches through hazm's releases and find the latest release that
- contains resources.
-
- Parameters
- ----------
- github_token:
- The GitHub token to use with GitHub API in order to avoid rate
- limit
-
- Returns
- -------
- The resources asset
- """
- g = Github(login_or_token=github_token)
- repo = g.get_repo('roshan-research/hazm')
- for release in repo.get_releases():
- for asset in release.get_assets():
- if asset.name.startswith(f'resources-{release.tag_name[1:]}'):
- return asset
-
-
-def download_and_extract_asset(
- asset: GitReleaseAsset,
- extract_path: Path,
-) -> None:
- """
- Downloads a GitHub asset file and extract it.
-
- Parameters
- ----------
- asset:
- The GitHub asset to be downloaded
-
- extract_path:
- The extract path for the downloaded file to be extracted
- """
- chunk_size = 1024 * 1024
- with typer.progressbar(
- length=asset.size, label=f'Downloading {asset.name} ...', fill_char='='
- ) as progress:
- with requests.get(url=asset.browser_download_url, stream=True) as r:
- with BytesIO() as io_file:
- for chunk in r.iter_content(chunk_size=chunk_size):
- io_file.write(chunk)
- progress.update(chunk_size)
- with ZipFile(io_file) as zip_file:
- zip_file.extractall(path=extract_path)
- typer.secho('Download completed.', fg='green')
+ gdown.download(
+ id='1Q3JK4NVUC2t5QT63aDiVrCRBV225E_B3',
+ output=str(
+ Path(__file__).parent.parent / 'resources' / 'pos_tagger.model'
+ ),
+ quiet=False,
+ )
diff --git a/perke/unsupervised/graph_based/multipartite_rank.py b/perke/unsupervised/graph_based/multipartite_rank.py
index 123bdc7..aca6561 100644
--- a/perke/unsupervised/graph_based/multipartite_rank.py
+++ b/perke/unsupervised/graph_based/multipartite_rank.py
@@ -52,7 +52,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags:
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'ADJ'}`.
"""
super().__init__(valid_pos_tags)
self.topic_ids: Dict[str, int] = {}
diff --git a/perke/unsupervised/graph_based/position_rank.py b/perke/unsupervised/graph_based/position_rank.py
index 3f0c9c9..268a337 100644
--- a/perke/unsupervised/graph_based/position_rank.py
+++ b/perke/unsupervised/graph_based/position_rank.py
@@ -42,8 +42,10 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags:
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'NOUN,EZ', 'ADJ', 'ADJ,EZ'}`.
"""
+ if valid_pos_tags is None:
+ valid_pos_tags = {'NOUN', 'NOUN,EZ', 'ADJ', 'ADJ,EZ'}
super().__init__(valid_pos_tags)
self.positions: DefaultDict[str, float] = defaultdict(float)
@@ -66,22 +68,21 @@ def select_candidates(
defaults to::
r\"""
NP:
- {}
+ {}
NP:
- {*}
- }{<.*e?>.
+ {*}
+ }{<.*(,EZ)?>
\"""
-
maximum_length: `int`
Maximum length in words of the candidate, defaults to `3`.
"""
if grammar is None:
grammar = r"""
NP:
- {}
+ {}
NP:
- {*}
- }{<.*e?>
+ {*}
+ }{<.*(,EZ)?>
"""
# Select sequence of noun phrases with given pattern
diff --git a/perke/unsupervised/graph_based/single_rank.py b/perke/unsupervised/graph_based/single_rank.py
index 26bbb5a..0a1922d 100644
--- a/perke/unsupervised/graph_based/single_rank.py
+++ b/perke/unsupervised/graph_based/single_rank.py
@@ -40,7 +40,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags:
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'ADJ'}`.
"""
super().__init__(valid_pos_tags)
self.graph_edges_are_weighted: bool = True
diff --git a/perke/unsupervised/graph_based/text_rank.py b/perke/unsupervised/graph_based/text_rank.py
index a49866d..e984db6 100644
--- a/perke/unsupervised/graph_based/text_rank.py
+++ b/perke/unsupervised/graph_based/text_rank.py
@@ -50,7 +50,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags:
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'ADJ'}`.
"""
super().__init__(valid_pos_tags)
self.graph: nx.Graph = nx.Graph()
diff --git a/perke/unsupervised/graph_based/topic_rank.py b/perke/unsupervised/graph_based/topic_rank.py
index 96fe8bb..3abdff6 100644
--- a/perke/unsupervised/graph_based/topic_rank.py
+++ b/perke/unsupervised/graph_based/topic_rank.py
@@ -54,7 +54,7 @@ def __init__(self, valid_pos_tags: Optional[Set[str]] = None) -> None:
----------
valid_pos_tags: `set[str]`, optional
Set of valid part of speech tags, defaults to nouns and
- adjectives. I.e. `{'N', 'Ne', 'AJ', 'AJe'}`.
+ adjectives. I.e. `{'NOUN', 'ADJ'}`.
"""
super().__init__(valid_pos_tags)
self.graph: nx.Graph = nx.Graph()
diff --git a/requirements/README.md b/requirements/README.md
deleted file mode 100644
index 918dafc..0000000
--- a/requirements/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# pip requirements files
-- [`main.txt`](main.txt): Default requirements
-- [`test.txt`](test.txt): Requirements for running test suite
-- [`documentation.txt`](documentation.txt): Requirements for building the
- documentation
diff --git a/requirements/main.txt b/requirements/main.txt
index a01b8e3..3731833 100644
--- a/requirements/main.txt
+++ b/requirements/main.txt
@@ -4,5 +4,4 @@ networkx
scipy
typer==0.5.0
rich-click==1.5.2
-PyGithub
-requests
+gdown
diff --git a/setup.py b/setup.py
index 4925954..f2b2aa8 100644
--- a/setup.py
+++ b/setup.py
@@ -75,6 +75,7 @@
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
+ 'Programming Language :: Python :: 3.11',
'Operating System :: OS Independent',
'Intended Audience :: Developers',
'Intended Audience :: Education',
diff --git a/tests/test_multipartite_rank.py b/tests/test_multipartite_rank.py
index 7a45e5f..513fa41 100644
--- a/tests/test_multipartite_rank.py
+++ b/tests/test_multipartite_rank.py
@@ -7,4 +7,4 @@ def test_original_article_default(text: str) -> None:
extractor.select_candidates()
extractor.weight_candidates()
keyphrases = [keyphrase for keyphrase, weight in extractor.get_n_best(n=3)]
- assert keyphrases == ['رایانه', 'طبیعی', 'پردازش زبان گفتاری']
+ assert keyphrases == ['رایانه', 'انسانی', 'طبیعی']
diff --git a/tests/test_position_rank.py b/tests/test_position_rank.py
index 0dab485..d8738f0 100644
--- a/tests/test_position_rank.py
+++ b/tests/test_position_rank.py
@@ -3,12 +3,12 @@
def test_original_article_default(text: str) -> None:
extractor = PositionRank()
- extractor.load_text(input=text)
+ extractor.load_text(input=text, universal_pos_tags=False)
extractor.select_candidates()
extractor.weight_candidates()
keyphrases = [keyphrase for keyphrase, weight in extractor.get_n_best(n=3)]
assert keyphrases == [
- 'پردازش زبانهای طبیعی',
- 'پردازش زبان طبیعی',
- 'پردازش زبان گفتاری',
+ 'کاربردهای پردازش زبان',
+ 'پردازش زبانهای',
+ 'کاربردهای گفتاری پردازش',
]
diff --git a/tests/test_single_rank.py b/tests/test_single_rank.py
index 43e9a55..41c6f6c 100644
--- a/tests/test_single_rank.py
+++ b/tests/test_single_rank.py
@@ -8,7 +8,7 @@ def test_original_article_default(text: str) -> None:
extractor.weight_candidates()
keyphrases = [keyphrase for keyphrase, weight in extractor.get_n_best(n=3)]
assert keyphrases == [
- 'کاربردهای پردازش زبان طبیعی',
'کاربردهای متنوع پردازش زبانهای طبیعی',
- 'پردازش زبان طبیعی',
+ 'کاربردهای پردازش زبان طبیعی',
+ 'پردازش زبانهای طبیعی عبارت',
]
diff --git a/tests/test_text_rank.py b/tests/test_text_rank.py
index 9c0daa6..bccc14e 100644
--- a/tests/test_text_rank.py
+++ b/tests/test_text_rank.py
@@ -8,8 +8,8 @@ def test_original_article_default(text: str) -> None:
keyphrases = [keyphrase for keyphrase, weight in extractor.get_n_best(n=3)]
assert keyphrases == [
'کاربردهای پردازش زبان طبیعی',
- 'کاربردهای گفتاری پردازش زبان',
'زمینه درک زبان طبیعی',
+ 'پردازش اطلاعات زبانی',
]
@@ -22,5 +22,5 @@ def test_with_candidate_selection(text: str) -> None:
assert keyphrases == [
'کاربردهای متنوع پردازش زبانهای طبیعی',
'کاربردهای پردازش زبان طبیعی',
- 'کاربردهای گفتاری پردازش زبان',
+ 'زمینه درک زبان طبیعی',
]
diff --git a/tests/test_topic_rank.py b/tests/test_topic_rank.py
index 71bdb81..405e0e9 100644
--- a/tests/test_topic_rank.py
+++ b/tests/test_topic_rank.py
@@ -7,4 +7,4 @@ def test_original_article_default(text: str) -> None:
extractor.select_candidates()
extractor.weight_candidates()
keyphrases = [keyphrase for keyphrase, weight in extractor.get_n_best(n=3)]
- assert keyphrases == ['طبیعی', 'رایانه', 'پردازش زبان گفتاری']
+ assert keyphrases == ['رایانه', 'پردازش زبان گفتاری', 'طبیعی']