From 5337e2977f159c29e2b8af575e56866862af241b Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 11:09:07 +0100 Subject: [PATCH 1/6] extractor: Check for valid offsets extracting MP3 performer tags This could be tricked to run out of bounds, add some check to ensure it does not happen. Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/425 --- src/extractor/tracker-extract-mp3.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index 0f13ad758..3d3abf8e8 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -1438,6 +1438,10 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi text_instrument = &data[pos]; text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - 1); offset = text_instrument_len + id3v2_nul_size (text_encode); + + if (pos + offset >= csize) + break; + text_performer = &data[pos + offset]; if (version == 2.4f) { -- GitLab From 2da6a9442f09b2d83e5c508a4161a6aa586c5598 Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 11:11:27 +0100 Subject: [PATCH 2/6] extractor: Bail out on 0-size frame for ID3v2.0 tags The code handling ID3v2.3 and v2.4 tags already bailed out correctly, do this too for 2.0 tags, avoids a possible NULL pointer dereference. Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/426 --- src/extractor/tracker-extract-mp3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index 3d3abf8e8..81150e6fb 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -2583,6 +2583,7 @@ parse_id3v20 (const gchar *data, break; } else if (csize == 0) { g_debug ("[v20] Content size was 0, moving to next frame"); + continue; } /* Early versions do not have unsynch per frame */ -- GitLab From 79f47309bad068ff0c19c1431abab6766edc687f Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 11:14:49 +0100 Subject: [PATCH 3/6] extractor: Check for buffer boundaries extracting MP3 TXXX tags This code could be tricked into reading out of bounds, add a check to ensure this does not happen. Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/427 --- src/extractor/tracker-extract-mp3.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index 81150e6fb..e469550b1 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -1485,8 +1485,12 @@ extract_txxx_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id text_desc = &data[pos + 4]; /* $00 (00) */ text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4); - offset = 4 + text_desc_len + id3v2_nul_size (text_encode); - text = &data[pos + offset]; /* */ + offset = 4 + text_desc_len + id3v2_nul_size (text_encode); + + if (pos + offset >= csize) + return; + + text = &data[pos + offset]; /* */ if (version == 2.3f) { description = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info); -- GitLab From efb4aa19ec8e4ec172457deea10ebb9a1a3147f1 Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 11:23:59 +0100 Subject: [PATCH 4/6] extractor: Minor code refactor The APIC tag is the same for ID3v2.3 and 2.4 frames, refactor its handling to a separate function, so the code is not repeated. --- src/extractor/tracker-extract-mp3.c | 82 +++++++++++++---------------- 1 file changed, 36 insertions(+), 46 deletions(-) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index e469550b1..ff2a2fb75 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -1565,6 +1565,38 @@ extract_ufid_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize) tag->mb_recording_id = identifier; } +static void +extract_apic_tag (id3v2tag *tag, + const gchar *data, + guint pos, + size_t csize, + id3tag *info, + MP3Data *filedata, + gfloat version) +{ + char text_type; + const char *mime; + char pic_type; + const char *desc; + guint offset; + int mime_len; + + text_type = data[pos + 0]; + mime = &data[pos + 1]; + mime_len = strnlen (mime, csize - 1); + pic_type = data[pos + 1 + mime_len + 1]; + desc = &data[pos + 1 + mime_len + 1 + 1]; + + if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) { + offset = pos + 1 + mime_len + 2; + offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type); + + filedata->media_art_data = &data[offset]; + filedata->media_art_size = csize - offset; + filedata->media_art_mime = mime; + } +} + static void get_id3v24_tags (id3v24frame frame, const gchar *data, @@ -1578,31 +1610,10 @@ get_id3v24_tags (id3v24frame frame, guint pos = 0; switch (frame) { - case ID3V24_APIC: { + case ID3V24_APIC: /* embedded image */ - gchar text_type; - const gchar *mime; - gchar pic_type; - const gchar *desc; - guint offset; - gint mime_len; - - text_type = data[pos + 0]; - mime = &data[pos + 1]; - mime_len = strnlen (mime, csize - 1); - pic_type = data[pos + 1 + mime_len + 1]; - desc = &data[pos + 1 + mime_len + 1 + 1]; - - if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) { - offset = pos + 1 + mime_len + 2; - offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type); - - filedata->media_art_data = &data[offset]; - filedata->media_art_size = csize - offset; - filedata->media_art_mime = mime; - } + extract_apic_tag (tag, data, pos, csize, info, filedata, 2.4f); break; - } case ID3V24_COMM: { gchar *word; @@ -1786,31 +1797,10 @@ get_id3v23_tags (id3v24frame frame, guint pos = 0; switch (frame) { - case ID3V24_APIC: { + case ID3V24_APIC: /* embedded image */ - gchar text_type; - const gchar *mime; - gchar pic_type; - const gchar *desc; - guint offset; - gint mime_len; - - text_type = data[pos + 0]; - mime = &data[pos + 1]; - mime_len = strnlen (mime, csize - 1); - pic_type = data[pos + 1 + mime_len + 1]; - desc = &data[pos + 1 + mime_len + 1 + 1]; - - if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) { - offset = pos + 1 + mime_len + 2; - offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type); - - filedata->media_art_data = &data[offset]; - filedata->media_art_size = csize - offset; - filedata->media_art_mime = mime; - } + extract_apic_tag (tag, data, pos, csize, info, filedata, 2.3f); break; - } case ID3V24_COMM: { gchar *word; -- GitLab From 9cc562cc126c408efb2a8220fcd67f006902412c Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 11:29:48 +0100 Subject: [PATCH 5/6] extractor: Refactor/fix handling of COMM tags Refactor the code handling COMM tags for ID3v2.3 and v2.4, so they share a common implementation. In doing that, this also fixes a missing check for boundaries for ID3v2.3, which existed for v2.4. Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/428 --- src/extractor/tracker-extract-mp3.c | 98 +++++++++++++---------------- 1 file changed, 44 insertions(+), 54 deletions(-) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index ff2a2fb75..25d208b15 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -1597,6 +1597,46 @@ extract_apic_tag (id3v2tag *tag, } } +static void +extract_comm_tag (id3v2tag *tag, + const gchar *data, + guint pos, + size_t csize, + id3tag *info, + gfloat version) +{ + gchar *word = NULL; + gchar text_encode; + const gchar *text_desc; + const gchar *text; + guint offset; + gint text_desc_len; + + text_encode = data[pos + 0]; /* $xx */ + text_desc = &data[pos + 4]; /* $00 (00) */ + text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4); + + offset = 4 + text_desc_len + id3v2_nul_size (text_encode); + + if (offset >= csize) + return; + + text = &data[pos + offset]; /* */ + + if (version == 2.3f) + word = id3v2_text_to_utf8 (text_encode, text, csize - offset, info); + else + word = id3v24_text_to_utf8 (text_encode, text, csize - offset, info); + + if (!tracker_is_empty_string (word)) { + g_strstrip (word); + g_free (tag->comment); + tag->comment = word; + } else { + g_free (word); + } +} + static void get_id3v24_tags (id3v24frame frame, const gchar *data, @@ -1615,35 +1655,9 @@ get_id3v24_tags (id3v24frame frame, extract_apic_tag (tag, data, pos, csize, info, filedata, 2.4f); break; - case ID3V24_COMM: { - gchar *word; - gchar text_encode; - const gchar *text_desc; - const gchar *text; - guint offset; - gint text_desc_len; - - text_encode = data[pos + 0]; /* $xx */ - text_desc = &data[pos + 4]; /* $00 (00) */ - text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4); - - offset = 4 + text_desc_len + id3v2_nul_size (text_encode); - text = &data[pos + offset]; /* */ - - if (offset >= csize) - break; - - word = id3v24_text_to_utf8 (text_encode, text, csize - offset, info); - - if (!tracker_is_empty_string (word)) { - g_strstrip (word); - g_free (tag->comment); - tag->comment = word; - } else { - g_free (word); - } + case ID3V24_COMM: + extract_comm_tag (tag, data, pos, csize, info, 2.4f); break; - } case ID3V24_TMCL: { extract_performers_tags (tag, data, pos, csize, info, 2.4f); @@ -1802,33 +1816,9 @@ get_id3v23_tags (id3v24frame frame, extract_apic_tag (tag, data, pos, csize, info, filedata, 2.3f); break; - case ID3V24_COMM: { - gchar *word; - gchar text_encode; - const gchar *text_desc; - const gchar *text; - guint offset; - gint text_desc_len; - - text_encode = data[pos + 0]; /* $xx */ - text_desc = &data[pos + 4]; /* $00 (00) */ - text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4); - - offset = 4 + text_desc_len + id3v2_nul_size (text_encode); - text = &data[pos + offset]; /* */ - - word = id3v2_text_to_utf8 (text_encode, text, csize - offset, info); - - if (!tracker_is_empty_string (word)) { - g_strstrip (word); - g_free (tag->comment); - tag->comment = word; - } else { - g_free (word); - } - + case ID3V24_COMM: + extract_comm_tag (tag, data, pos, csize, info, 2.4f); break; - } case ID3V24_IPLS: { extract_performers_tags (tag, data, pos, csize, info, 2.3f); -- GitLab From 2897ca48b7ae79db7dcfe7e66cdd5d75cb641466 Mon Sep 17 00:00:00 2001 Date: Thu, 15 Jan 2026 13:07:43 +0100 Subject: [PATCH 6/6] extractor: Fix accounting of offsets within MP3 performer tags We were moving two needles when parsing multiple performers, on one hand the base position within the MP3 frame, and on the other the offset accounted for locally within performers, this made us inadvertently pass maximum allowed lengths that actually were past the data frame. To avoid this confusion, keep the base position fixed as it happens during parsing of other tags, and only move the local offset variable. This fixes the invalid maximum lenghts being passed. Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/429 --- src/extractor/tracker-extract-mp3.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c index 25d208b15..867e0b15f 100644 --- a/src/extractor/tracker-extract-mp3.c +++ b/src/extractor/tracker-extract-mp3.c @@ -1420,12 +1420,12 @@ static void extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id3tag *info, gfloat version) { gchar text_encode; - guint offset = 0; + size_t offset = 0; GSList *performers; gint n_performers = 0; text_encode = data[pos]; - pos += 1; + offset += 1; performers = NULL; while (pos + offset < csize) { @@ -1435,9 +1435,9 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi gint text_performer_len; gchar *performer = NULL; - text_instrument = &data[pos]; - text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - 1); - offset = text_instrument_len + id3v2_nul_size (text_encode); + text_instrument = &data[pos + offset]; + text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - offset); + offset += text_instrument_len + id3v2_nul_size (text_encode); if (pos + offset >= csize) break; @@ -1454,7 +1454,7 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi n_performers += 1; text_performer_len = id3v2_strlen (text_encode, text_performer, csize - offset); - pos += text_instrument_len + text_performer_len + 2*id3v2_nul_size (text_encode); + offset += text_performer_len + id3v2_nul_size (text_encode); } if (performers) { -- GitLab