Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
Notebook para calcular metricas
  • Loading branch information
gregoriofsg authored Nov 4, 2023
1 parent 80cdedb commit 4793a09
Showing 1 changed file with 204 additions and 0 deletions.
204 changes: 204 additions & 0 deletions Metrics.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "f0b57e26-f4fb-4eea-a8a0-060b8e00ec99",
"metadata": {},
"outputs": [],
"source": [
"!pip install rank_eval"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d0ba5720-11b7-43ba-87b9-44a13dafc4ba",
"metadata": {},
"outputs": [],
"source": [
"!pip install ranx"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "273145ec-bb91-4182-8a53-90816057c442",
"metadata": {},
"outputs": [],
"source": [
"from rank_eval import Qrels, Run, evaluate, compare\n",
"import pandas as pd\n",
"from pandas import DataFrame\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5923ed76-11a1-419f-835f-81dc36e90e4e",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('resp_bertopic_unsuperv_filter.csv')\n",
"df['indice'] = df['indice'].astype(str)\n",
"df['num_tema_cadastrado'] = df['num_tema_cadastrado'].astype(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c170ef1-096d-42e4-b5a3-ab905d603b10",
"metadata": {},
"outputs": [],
"source": [
"q_id = []\n",
"doc_id = []\n",
"score = []\n",
"for indice, linha in df.iterrows():\n",
" for i in range(1,7):\n",
" q_id.append(linha[0])\n",
" sug = f\"sugerido_{i}\"\n",
" doc_id.append(linha[sug])\n",
" sim = f\"similaridade_{i}\"\n",
" score.append(linha[sim])\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "165e3b90-12b9-4c5a-902b-94ac8f73c2fb",
"metadata": {},
"outputs": [],
"source": [
"ranx_dict ={\"q_id\": q_id,\n",
" \"doc_id\": doc_id,\n",
" \"score\" : score\n",
"}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f72578a-9ad2-42bc-927e-d0cad36933a6",
"metadata": {},
"outputs": [],
"source": [
"qrel = Qrels.from_df(df, q_id_col='indice', doc_id_col='num_tema_cadastrado', score_col='posicao_tema_real')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "7f61ea60-65ad-4fd5-bd03-1e5953d1e24d",
"metadata": {},
"outputs": [],
"source": [
"run_df = DataFrame.from_dict(ranx_dict)\n",
"run_df['q_id'] = run_df['q_id'].astype(str)\n",
"run_df['doc_id'] = run_df['doc_id'].astype(str)\n",
"run = Run.from_df(\n",
" df=run_df,\n",
" q_id_col=\"q_id\",\n",
" doc_id_col=\"doc_id\",\n",
" score_col=\"score\",\n",
" name=\"my_run\",\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "6cd573e2-43b6-403b-961f-08997eb945e7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'map@6': 0.027846322990471534,\n",
" 'ndcg@6': 0.035352674332042214,\n",
" 'mrr': 0.027846322990471534}"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"evaluate(qrel, run, [\"map@6\",\"ndcg@6\",\"mrr\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aea3557d-ed84-4fdd-9bd0-bf6647aa366c",
"metadata": {},
"outputs": [],
"source": [
"# Access scores for each query\n",
"dict(run.scores)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a170f6e4-901f-4f9a-a5f9-d3deea092193",
"metadata": {},
"outputs": [],
"source": [
"# Computed metric scores are saved in the Run object\n",
"run.mean_scores\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7b8f939e-8028-4bcf-a391-915edc14ff1f",
"metadata": {},
"outputs": [],
"source": [
"# Compare different runs and perform statistical tests\n",
"#report = compare(\n",
"# qrels=qrels,\n",
"# runs=[run_1, run_2, run_3, run_4, run_5],\n",
"# metrics=[\"map@100\", \"mrr@100\", \"ndcg@10\"],\n",
"# max_p=0.01 # P-value threshold\n",
"#)\n",
"\n",
"#print(report)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "782a63b1-b2e9-44b4-91ce-7d332af1deb7",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 4793a09

Please sign in to comment.