Skip to content

Commit

Permalink
LB-1687: Add mapping info to listen exports
Browse files Browse the repository at this point in the history
The mapping information is available in the listens api, make it available in listen exports as well.
  • Loading branch information
amCap1712 committed Jan 10, 2025
1 parent 8465f71 commit e3a5699
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 20 deletions.
86 changes: 75 additions & 11 deletions listenbrainz/background/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,81 @@ def export_query_to_jsonl(conn, file_path, query, **kwargs):
def export_listens_for_time_range(ts_conn, file_path, user_id: int, start_time: datetime, end_time: datetime):
""" Export user's listens for a given time period. """
query = """
SELECT jsonb_build_object(
'listened_at'
, extract(epoch from listened_at)
, 'track_metadata'
, jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text))
)::text as line
FROM listen
WHERE listened_at >= :start_time
AND listened_at <= :end_time
AND user_id = :user_id
ORDER BY listened_at ASC
WITH selected_listens AS (
SELECT l.listened_at
, l.data
, l.recording_msid
, COALESCE((data->'additional_info'->>'recording_mbid')::uuid, user_mm.recording_mbid, mm.recording_mbid, other_mm.recording_mbid) AS recording_mbid
FROM listen l
LEFT JOIN mbid_mapping mm
ON l.recording_msid = mm.recording_msid
LEFT JOIN mbid_manual_mapping user_mm
ON l.recording_msid = user_mm.recording_msid
AND user_mm.user_id = l.user_id
LEFT JOIN mbid_manual_mapping_top other_mm
ON l.recording_msid = other_mm.recording_msid
WHERE listened_at >= :start_time
AND listened_at <= :end_time
AND l.user_id = :user_id
)
SELECT jsonb_build_object(
'listened_at'
, extract(epoch from listened_at)
, 'track_metadata'
, jsonb_set(
jsonb_set(data, '{recording_msid}'::text[], to_jsonb(recording_msid::text)),
'{mbid_mapping}'::text[]
, CASE
WHEN mbc.recording_mbid IS NULL
THEN 'null'::jsonb
ELSE
jsonb_build_object(
'recording_name'
, mbc.recording_data->>'name'
, 'recording_mbid'
, mbc.recording_mbid::text
, 'release_mbid'
, mbc.release_mbid::text
, 'artist_mbids'
, mbc.artist_mbids::TEXT[]
, 'caa_id'
, (mbc.release_data->>'caa_id')::bigint
, 'caa_release_mbid'
, (mbc.release_data->>'caa_release_mbid')::text
, 'artists'
, jsonb_agg(
jsonb_build_object(
'artist_credit_name'
, artist->>'name'
, 'join_phrase'
, artist->>'join_phrase'
, 'artist_mbid'
, mbc.artist_mbids[position]
)
ORDER BY position
)
)
END
)
)::text as line
FROM selected_listens sl
LEFT JOIN mapping.mb_metadata_cache mbc
ON sl.recording_mbid = mbc.recording_mbid
LEFT JOIN LATERAL jsonb_array_elements(artist_data->'artists') WITH ORDINALITY artists(artist, position)
ON TRUE
GROUP BY sl.listened_at
, sl.recording_msid
, sl.data
, mbc.recording_mbid
, recording_data->>'name'
, release_mbid
, artist_mbids
, artist_data->>'name'
, recording_data->>'name'
, release_data->>'name'
, release_data->>'caa_id'
, release_data->>'caa_release_mbid'
ORDER BY listened_at
"""
return export_query_to_jsonl(ts_conn, file_path, query, user_id=user_id, start_time=start_time, end_time=end_time)

Expand Down
101 changes: 92 additions & 9 deletions listenbrainz/tests/integration/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from brainzutils import cache
from sqlalchemy import text

import listenbrainz.db.user as db_user
import listenbrainz.db.pinned_recording as db_pinned_rec
import listenbrainz.db.feedback as db_feedback
import listenbrainz.db.pinned_recording as db_pinned_rec
import listenbrainz.db.user as db_user
from listenbrainz.background.export import cleanup_old_exports
from listenbrainz.db.model.feedback import Feedback

from listenbrainz.db.model.pinned_recording import WritablePinnedRecording
from listenbrainz.listenstore.timescale_utils import recalculate_all_user_data
from listenbrainz.tests.integration import ListenAPIIntegrationTestCase
Expand All @@ -26,6 +25,52 @@ def setUp(self):
self.user = db_user.get_or_create(self.db_conn, 1799, 'lucifer-export')
db_user.agree_to_gdpr(self.db_conn, self.user['musicbrainz_id'])
self.redis = cache._r
self.recording = {
"recording_mbid": "6b64a82d-0aa8-430e-bf25-26aa4c569af0",
"artist_mbids": ["d15721d8-56b4-453d-b506-fc915b14cba2"],
"release_mbid": "f10badef-094b-48b1-b345-cddfc3d41673",
"recording_data": {
"name": "Sister",
"rels": [
{
"type": "performer", "artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2",
"artist_name": "The Black Keys"
}
],
"length": 205000
},
"artist_data": {
"name": "The Black Keys",
"artists": [
{
"area": "United States",
"name": "The Black Keys",
"rels": {
"lyrics": "https://www.musixmatch.com/artist/The-Black-Keys",
"youtube": "https://www.youtube.com/channel/UCJL3h2-wEOB6EigQOBZ3ryg",
"wikidata": "https://www.wikidata.org/wiki/Q606226",
"streaming": "https://tidal.com/artist/64643",
"free streaming": "https://www.deezer.com/artist/2483",
"social network": "https://www.instagram.com/theblackkeys/",
"official homepage": "https://www.theblackkeys.com/",
"purchase for download": "https://www.qobuz.com/us-en/interpreter/-/40589",
"purchase for mail-order": "https://www.cdjapan.co.jp/person/700225155"},
"type": "Group", "begin_year": 2001,
"join_phrase": ""}
],
"artist_credit_id": 59036
},
"tag_data": {},
"release_data": {
"mbid": "f10badef-094b-48b1-b345-cddfc3d41673",
"name": "El Camino",
"year": 2011,
"caa_id": 39543436175,
"caa_release_mbid": "2fa2b8f0-e4ab-46e8-8ab4-91d0e1aecfad",
"album_artist_name": "The Black Keys",
"release_group_mbid": "c2eed4c1-5cd9-469a-9075-b82077093967"
},
}

def tearDown(self):
with self.app.app_context():
Expand All @@ -34,6 +79,29 @@ def tearDown(self):
os.remove(os.path.join(self.app.config["USER_DATA_EXPORT_BASE_DIR"], archive))
super(ExportTestCase, self).tearDown()

def create_mapping_record(self, recording_msid):
self.ts_conn.execute(text("""
INSERT INTO mapping.mb_metadata_cache
(recording_mbid, artist_mbids, release_mbid, recording_data, artist_data, tag_data, release_data, dirty)
VALUES (:recording_mbid ::UUID, :artist_mbids ::UUID[], :release_mbid ::UUID, :recording_data, :artist_data, :tag_data, :release_data, 'f')
"""), {
"recording_mbid": self.recording["recording_mbid"],
"artist_mbids": self.recording["artist_mbids"],
"release_mbid": self.recording["release_mbid"],
"recording_data": json.dumps(self.recording["recording_data"]),
"artist_data": json.dumps(self.recording["artist_data"]),
"release_data": json.dumps(self.recording["release_data"]),
"tag_data": json.dumps(self.recording["tag_data"])
})
self.ts_conn.execute(text("""
INSERT INTO mbid_mapping (recording_msid, recording_mbid, match_type)
VALUES (:recording_msid, :recording_mbid, 'exact_match')
"""), {
"recording_msid": recording_msid,
"recording_mbid": self.recording["recording_mbid"]
})
self.ts_conn.commit()

def send_listens(self):
with open(self.path_to_data_file('user_export_test.json')) as f:
payload = json.load(f)
Expand All @@ -52,9 +120,11 @@ def test_export(self):

self.send_listens()
url = self.custom_url_for('api_v1.get_listens', user_name=self.user['musicbrainz_id'])
response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '1'})
response = self.wait_for_query_to_have_items(url, 1, query_string={'count': '3'})
data = json.loads(response.data)['payload']
self.assert200(response)
recording_msid = data["listens"][1]["recording_msid"]
self.create_mapping_record(recording_msid)

pinned_recordings = [
{
Expand Down Expand Up @@ -141,13 +211,8 @@ def test_export(self):
else:
break

with self.app.app_context():
for task in db_conn.execute(text("SELECT * FROM user_data_export")).all():
print(task)

self.assert200(response)
with zipfile.ZipFile(BytesIO(response.data), "r") as export_zip:
export_zip.printdir()
with export_zip.open("user.json", "r") as f:
data = json.load(f)
self.assertEqual(data, {
Expand All @@ -170,6 +235,24 @@ def test_export(self):
self.assertEqual(expected["track_metadata"]["artist_name"], received["track_metadata"]["artist_name"])
self.assertEqual(expected["track_metadata"]["release_name"], received["track_metadata"]["release_name"])
self.assertEqual(expected["listened_at"], received["listened_at"])
if received["track_metadata"]["track_name"] == "Sister":
self.assertEqual({
"caa_id": self.recording["release_data"]["caa_id"],
"caa_release_mbid": self.recording["release_data"]["caa_release_mbid"],
"artist_mbids": self.recording["artist_mbids"],
"recording_name": self.recording["recording_data"]["name"],
"recording_mbid": self.recording["recording_mbid"],
"release_mbid": self.recording["release_data"]["mbid"],
"artists": [
{
"artist_mbid": "d15721d8-56b4-453d-b506-fc915b14cba2",
"join_phrase": "",
"artist_credit_name": "The Black Keys"
}
],
}, received["track_metadata"]["mbid_mapping"])
else:
self.assertEqual(None, received["track_metadata"]["mbid_mapping"])

with export_zip.open("pinned_recording.jsonl", "r") as f:
received_pins = []
Expand Down

0 comments on commit e3a5699

Please sign in to comment.