Skip to content

Commit

Permalink
Fix relation metadata indexing in nidx + old index (#2770)
Browse files Browse the repository at this point in the history
* Fix NIDX relation metadata indexing

* Fix also old index

* Add unit tests

* Add unit tests to nidx
  • Loading branch information
carlesonielfa authored Jan 9, 2025
1 parent b608776 commit 4f491f5
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 20 deletions.
6 changes: 3 additions & 3 deletions nidx/nidx_relation/src/resource_indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pub fn index_relations(
let source = relation.source.as_ref().expect("Missing source");
let source_value = source.value.as_str();
let source_type = io_maps::node_type_to_u64(source.ntype());
let soruce_subtype = source.subtype.as_str();
let source_subtype = source.subtype.as_str();

let target = relation.to.as_ref().expect("Missing target");
let target_value = target.value.as_str();
Expand All @@ -55,7 +55,7 @@ pub fn index_relations(
schema.resource_id => resource_id,
schema.source_value => source_value,
schema.source_type => source_type,
schema.source_subtype => soruce_subtype,
schema.source_subtype => source_subtype,
schema.target_value => target_value,
schema.target_type => target_type,
schema.target_subtype => target_subtype,
Expand All @@ -65,7 +65,7 @@ pub fn index_relations(

if let Some(metadata) = relation.metadata.as_ref() {
let encoded_metadata = metadata.encode_to_vec();
new_doc.add_bytes(schema.label, encoded_metadata);
new_doc.add_bytes(schema.metadata, encoded_metadata);
}

writer.add_document(new_doc)?;
Expand Down
16 changes: 16 additions & 0 deletions nidx/nidx_relation/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,19 @@ pub fn create_relation(
}),
}
}

pub fn create_relation_with_metadata(
source: String,
source_node_type: NodeType,
source_subtype: String,
to: String,
to_node_type: NodeType,
to_subtype: String,
rel_type: RelationType,
metadata: RelationMetadata,
) -> Relation {
let mut relation =
create_relation(source, source_node_type, source_subtype, to, to_node_type, to_subtype, rel_type);
relation.metadata = Some(metadata);
relation
}
47 changes: 44 additions & 3 deletions nidx/nidx_relation/tests/test_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ use nidx_protos::entities_subgraph_request::DeletedEntities;
use nidx_protos::relation::RelationType;
use nidx_protos::relation_node::NodeType;
use nidx_protos::{
EntitiesSubgraphRequest, RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest, Resource,
ResourceId,
EntitiesSubgraphRequest, RelationMetadata, RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest,
Resource, ResourceId,
};
use nidx_relation::{RelationIndexer, RelationSearcher};
use nidx_tantivy::{TantivyMeta, TantivySegmentMetadata};
Expand Down Expand Up @@ -109,14 +109,23 @@ fn create_reader() -> anyhow::Result<RelationSearcher> {
"PEOPLE".to_string(),
RelationType::Entity,
),
common::create_relation(
common::create_relation_with_metadata(
"Anthony".to_string(),
NodeType::Entity,
"PEOPLE".to_string(),
"Netherlands".to_string(),
NodeType::Entity,
"PLACES".to_string(),
RelationType::Entity,
RelationMetadata {
paragraph_id: Some("myresource/0/myresource/100-200".to_string()),
source_start: Some(0),
source_end: Some(10),
to_start: Some(11),
to_end: Some(20),
data_augmentation_task_id: Some("mytask".to_string()),
..Default::default()
},
),
common::create_relation(
"Anna".to_string(),
Expand Down Expand Up @@ -203,6 +212,38 @@ fn test_search() -> anyhow::Result<()> {
Ok(())
}

#[test]
fn test_search_metadata() -> anyhow::Result<()> {
let reader = create_reader()?;

let result = reader.search(&RelationSearchRequest {
subgraph: Some(EntitiesSubgraphRequest {
depth: Some(1_i32),
entry_points: vec![common::create_relation_node(
"Anthony".to_string(),
NodeType::Entity,
"PEOPLE".to_string(),
)],
..Default::default()
}),
..Default::default()
})?;

let subgraph = result.subgraph.unwrap();
assert_eq!(subgraph.relations.len(), 1);

let relation = &subgraph.relations[0];
let metadata = relation.metadata.as_ref().unwrap();
assert_eq!(metadata.paragraph_id, Some("myresource/0/myresource/100-200".to_string()));
assert_eq!(metadata.source_start, Some(0));
assert_eq!(metadata.source_end, Some(10));
assert_eq!(metadata.to_start, Some(11));
assert_eq!(metadata.to_end, Some(20));
assert_eq!(metadata.data_augmentation_task_id, Some("mytask".to_string()));

Ok(())
}

#[test]
fn test_prefix_search() -> anyhow::Result<()> {
let reader = create_reader()?;
Expand Down
13 changes: 11 additions & 2 deletions nidx/nidx_relation/tests/test_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
//
mod common;

use nidx_protos::{relation::RelationType, relation_node::NodeType, Resource, ResourceId};
use nidx_protos::{relation::RelationType, relation_node::NodeType, RelationMetadata, Resource, ResourceId};
use nidx_relation::RelationIndexer;
use tempfile::TempDir;

Expand All @@ -42,14 +42,23 @@ fn test_index_docs() -> anyhow::Result<()> {
"ANIMALS".to_string(),
RelationType::Entity,
),
common::create_relation(
common::create_relation_with_metadata(
"01808bbd8e784552967a4fb0d8b6e584".to_string(),
NodeType::Resource,
"".to_string(),
"bird".to_string(),
NodeType::Entity,
"ANIMALS".to_string(),
RelationType::Entity,
RelationMetadata {
paragraph_id: Some("myresource/0/myresource/100-200".to_string()),
source_start: Some(0),
source_end: Some(10),
to_start: Some(11),
to_end: Some(20),
data_augmentation_task_id: Some("mytask".to_string()),
..Default::default()
},
),
],
..Default::default()
Expand Down
27 changes: 25 additions & 2 deletions nidx/tests/test_search_relations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ use nidx_protos::relation::RelationType;
use nidx_protos::relation_node::NodeType;
use nidx_protos::resource::ResourceStatus;
use nidx_protos::{
EntitiesSubgraphRequest, IndexMetadata, NewShardRequest, Relation, RelationNode, RelationNodeFilter,
RelationPrefixSearchRequest, RelationSearchRequest, RelationSearchResponse, Resource, ResourceId,
EntitiesSubgraphRequest, IndexMetadata, NewShardRequest, Relation, RelationMetadata, RelationNode,
RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest, RelationSearchResponse, Resource,
ResourceId,
};
use nidx_protos::{SearchRequest, VectorIndexConfig};
use sqlx::PgPool;
Expand Down Expand Up @@ -259,6 +260,15 @@ async fn create_knowledge_graph(fixture: &mut NidxFixture, shard_id: String) ->
source: Some(relation_nodes.get("Poetry").unwrap().clone()),
to: Some(relation_nodes.get("Swallow").unwrap().clone()),
relation_label: "about".to_string(),
metadata: Some(RelationMetadata {
paragraph_id: Some("myresource/0/myresource/100-200".to_string()),
source_start: Some(0),
source_end: Some(10),
to_start: Some(11),
to_end: Some(20),
data_augmentation_task_id: Some("mytask".to_string()),
..Default::default()
}),
..Default::default()
},
Relation {
Expand Down Expand Up @@ -642,6 +652,19 @@ async fn test_search_relations_neighbours(pool: PgPool) -> Result<(), Box<dyn st
.await?;

let expected = HashSet::from_iter([("Poetry".to_string(), "Swallow".to_string())]);

// Check that the relation obtained as response has appropiate metadata
let subgraph = response.subgraph.clone().unwrap();
let first_relation = &subgraph.relations[0];
let metadata = first_relation.metadata.as_ref().unwrap();

assert_eq!(metadata.paragraph_id, Some("myresource/0/myresource/100-200".to_string()));
assert_eq!(metadata.source_start, Some(0));
assert_eq!(metadata.source_end, Some(10));
assert_eq!(metadata.to_start, Some(11));
assert_eq!(metadata.to_end, Some(20));
assert_eq!(metadata.data_augmentation_task_id, Some("mytask".to_string()));

let neighbour_relations = extract_relations(response);
assert!(expected.is_subset(&neighbour_relations));

Expand Down
27 changes: 25 additions & 2 deletions nucliadb_node/tests/test_search_relations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ use nucliadb_core::protos::relation::RelationType;
use nucliadb_core::protos::relation_node::NodeType;
use nucliadb_core::protos::resource::ResourceStatus;
use nucliadb_core::protos::{
EntitiesSubgraphRequest, IndexMetadata, NewShardRequest, Relation, RelationNode, RelationNodeFilter,
RelationPrefixSearchRequest, RelationSearchRequest, RelationSearchResponse, Resource, ResourceId,
EntitiesSubgraphRequest, IndexMetadata, NewShardRequest, Relation, RelationMetadata, RelationNode,
RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest, RelationSearchResponse, Resource,
ResourceId,
};
use nucliadb_protos::nodereader::SearchRequest;
use rstest::*;
Expand Down Expand Up @@ -260,6 +261,15 @@ async fn create_knowledge_graph(writer: &mut TestNodeWriter, shard_id: String) -
source: Some(relation_nodes.get("Poetry").unwrap().clone()),
to: Some(relation_nodes.get("Swallow").unwrap().clone()),
relation_label: "about".to_string(),
metadata: Some(RelationMetadata {
paragraph_id: Some("myresource/0/myresource/100-200".to_string()),
source_start: Some(0),
source_end: Some(10),
to_start: Some(11),
to_end: Some(20),
data_augmentation_task_id: Some("mytask".to_string()),
..Default::default()
}),
..Default::default()
},
Relation {
Expand Down Expand Up @@ -609,6 +619,19 @@ async fn test_search_relations_neighbours() -> Result<(), Box<dyn std::error::Er
.await?;

let expected = HashSet::from_iter([("Poetry".to_string(), "Swallow".to_string())]);

// Check that the relation obtained as response has appropiate metadata
let subgraph = response.subgraph.clone().unwrap();
let first_relation = &subgraph.relations[0];
let metadata = first_relation.metadata.as_ref().unwrap();

assert_eq!(metadata.paragraph_id, Some("myresource/0/myresource/100-200".to_string()));
assert_eq!(metadata.source_start, Some(0));
assert_eq!(metadata.source_end, Some(10));
assert_eq!(metadata.to_start, Some(11));
assert_eq!(metadata.to_end, Some(20));
assert_eq!(metadata.data_augmentation_task_id, Some("mytask".to_string()));

let neighbour_relations = extract_relations(&response);
assert!(expected.is_subset(&neighbour_relations));

Expand Down
6 changes: 3 additions & 3 deletions nucliadb_relations2/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ impl RelationsWriterService {
let source = relation.source.as_ref().expect("Missing source");
let source_value = source.value.as_str();
let source_type = io_maps::node_type_to_u64(source.ntype());
let soruce_subtype = source.subtype.as_str();
let source_subtype = source.subtype.as_str();

let target = relation.to.as_ref().expect("Missing target");
let target_value = target.value.as_str();
Expand All @@ -189,7 +189,7 @@ impl RelationsWriterService {
self.schema.resource_id => resource_id,
self.schema.source_value => source_value,
self.schema.source_type => source_type,
self.schema.source_subtype => soruce_subtype,
self.schema.source_subtype => source_subtype,
self.schema.target_value => target_value,
self.schema.target_type => target_type,
self.schema.target_subtype => target_subtype,
Expand All @@ -199,7 +199,7 @@ impl RelationsWriterService {

if let Some(metadata) = relation.metadata.as_ref() {
let encoded_metadata = metadata.encode_to_vec();
new_doc.add_bytes(self.schema.label, encoded_metadata);
new_doc.add_bytes(self.schema.metadata, encoded_metadata);
}

self.writer.add_document(new_doc)?;
Expand Down
16 changes: 16 additions & 0 deletions nucliadb_relations2/tests/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,19 @@ pub fn create_relation(
}),
}
}

pub fn create_relation_with_metadata(
source: String,
source_node_type: NodeType,
source_subtype: String,
to: String,
to_node_type: NodeType,
to_subtype: String,
rel_type: RelationType,
metadata: RelationMetadata,
) -> Relation {
let mut relation =
create_relation(source, source_node_type, source_subtype, to, to_node_type, to_subtype, rel_type);
relation.metadata = Some(metadata);
relation
}
47 changes: 44 additions & 3 deletions nucliadb_relations2/tests/test_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ use nucliadb_core::protos::entities_subgraph_request::DeletedEntities;
use nucliadb_core::protos::relation::RelationType;
use nucliadb_core::protos::relation_node::NodeType;
use nucliadb_core::protos::{
EntitiesSubgraphRequest, RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest, Resource,
ResourceId,
EntitiesSubgraphRequest, RelationMetadata, RelationNodeFilter, RelationPrefixSearchRequest, RelationSearchRequest,
Resource, ResourceId,
};
use nucliadb_core::relations::*;
use nucliadb_relations2::reader::RelationsReaderService;
Expand Down Expand Up @@ -93,14 +93,23 @@ fn create_reader() -> NodeResult<RelationsReaderService> {
"PEOPLE".to_string(),
RelationType::Entity,
),
common::create_relation(
common::create_relation_with_metadata(
"Anthony".to_string(),
NodeType::Entity,
"PEOPLE".to_string(),
"Netherlands".to_string(),
NodeType::Entity,
"PLACES".to_string(),
RelationType::Entity,
RelationMetadata {
paragraph_id: Some("myresource/0/myresource/100-200".to_string()),
source_start: Some(0),
source_end: Some(10),
to_start: Some(11),
to_end: Some(20),
data_augmentation_task_id: Some("mytask".to_string()),
..Default::default()
},
),
common::create_relation(
"Anna".to_string(),
Expand Down Expand Up @@ -212,6 +221,38 @@ fn test_search() -> NodeResult<()> {
Ok(())
}

#[test]
fn test_search_metadata() -> NodeResult<()> {
let reader = create_reader()?;

let result = reader.search(&RelationSearchRequest {
subgraph: Some(EntitiesSubgraphRequest {
depth: Some(1_i32),
entry_points: vec![common::create_relation_node(
"Anthony".to_string(),
NodeType::Entity,
"PEOPLE".to_string(),
)],
..Default::default()
}),
..Default::default()
})?;

let subgraph = result.subgraph.unwrap();
assert_eq!(subgraph.relations.len(), 1);

let relation = &subgraph.relations[0];
let metadata = relation.metadata.as_ref().unwrap();
assert_eq!(metadata.paragraph_id, Some("myresource/0/myresource/100-200".to_string()));
assert_eq!(metadata.source_start, Some(0));
assert_eq!(metadata.source_end, Some(10));
assert_eq!(metadata.to_start, Some(11));
assert_eq!(metadata.to_end, Some(20));
assert_eq!(metadata.data_augmentation_task_id, Some("mytask".to_string()));

Ok(())
}

#[test]
fn test_prefix_search() -> NodeResult<()> {
let reader = create_reader()?;
Expand Down
Loading

0 comments on commit 4f491f5

Please sign in to comment.