<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://music-ir.org/mirex/w/index.php?action=history&amp;feed=atom&amp;title=2025%3AMusic_Description_%26_Captioning_Results</id>
	<title>2025:Music Description &amp; Captioning Results - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://music-ir.org/mirex/w/index.php?action=history&amp;feed=atom&amp;title=2025%3AMusic_Description_%26_Captioning_Results"/>
	<link rel="alternate" type="text/html" href="https://music-ir.org/mirex/w/index.php?title=2025:Music_Description_%26_Captioning_Results&amp;action=history"/>
	<updated>2026-04-05T17:59:10Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.31.1</generator>
	<entry>
		<id>https://music-ir.org/mirex/w/index.php?title=2025:Music_Description_%26_Captioning_Results&amp;diff=14787&amp;oldid=prev</id>
		<title>Ldzhangyx: /* Results */</title>
		<link rel="alternate" type="text/html" href="https://music-ir.org/mirex/w/index.php?title=2025:Music_Description_%26_Captioning_Results&amp;diff=14787&amp;oldid=prev"/>
		<updated>2025-09-11T10:20:07Z</updated>

		<summary type="html">&lt;p&gt;‎&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Results&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 10:20, 11 September 2025&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l20&quot; &gt;Line 20:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 20:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| MusiLingo&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| MusiLingo&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| MusicInstruct Dataset&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| MusicInstruct Dataset&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.302*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;0.302&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.081*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.081*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.143*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.143*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|- &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;style=&amp;quot;background-color:#FF0;&amp;quot;&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|- &amp;#160;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Baseline 3&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Baseline 3&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Qwen2-Audio&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Qwen2-Audio&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Multiple datasets&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| Multiple datasets&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.285*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.285*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.234*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;0.234&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | 0.285*&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;| style=&amp;quot;text-align:right;&amp;quot; | &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;0.285&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;'''&lt;/ins&gt;*&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|}&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;|}&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Reported by paper results.&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;&amp;#160;&lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Reported by paper results.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Ldzhangyx</name></author>
		
	</entry>
	<entry>
		<id>https://music-ir.org/mirex/w/index.php?title=2025:Music_Description_%26_Captioning_Results&amp;diff=14786&amp;oldid=prev</id>
		<title>Ldzhangyx: Created page with &quot;=Results=  {| class=&quot;wikitable&quot; style=&quot;vertical-align:bottom;&quot; |- style=&quot;font-weight:bold;&quot; ! System ! Methods Used ! Trained on the training set of ! style=&quot;text-align:right;...&quot;</title>
		<link rel="alternate" type="text/html" href="https://music-ir.org/mirex/w/index.php?title=2025:Music_Description_%26_Captioning_Results&amp;diff=14786&amp;oldid=prev"/>
		<updated>2025-09-11T10:19:22Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;=Results=  {| class=&amp;quot;wikitable&amp;quot; style=&amp;quot;vertical-align:bottom;&amp;quot; |- style=&amp;quot;font-weight:bold;&amp;quot; ! System ! Methods Used ! Trained on the training set of ! style=&amp;quot;text-align:right;...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;=Results=&lt;br /&gt;
&lt;br /&gt;
{| class=&amp;quot;wikitable&amp;quot; style=&amp;quot;vertical-align:bottom;&amp;quot;&lt;br /&gt;
|- style=&amp;quot;font-weight:bold;&amp;quot;&lt;br /&gt;
! System&lt;br /&gt;
! Methods Used&lt;br /&gt;
! Trained on the training set of&lt;br /&gt;
! style=&amp;quot;text-align:right;&amp;quot; | ROUGE-L&lt;br /&gt;
! style=&amp;quot;text-align:right;&amp;quot; | BLEU (Avg. 1-4)&lt;br /&gt;
! style=&amp;quot;text-align:right;&amp;quot; | METEOR&lt;br /&gt;
|-&lt;br /&gt;
| Baseline 1&lt;br /&gt;
| LP-MusicCaps&lt;br /&gt;
| LP-MusicCaps Dataset&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.119&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.033&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.167&lt;br /&gt;
|- &lt;br /&gt;
| Baseline 2&lt;br /&gt;
| MusiLingo&lt;br /&gt;
| MusicInstruct Dataset&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.302*&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.081*&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.143*&lt;br /&gt;
|- style=&amp;quot;background-color:#FF0;&amp;quot;&lt;br /&gt;
| Baseline 3&lt;br /&gt;
| Qwen2-Audio&lt;br /&gt;
| Multiple datasets&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.285*&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.234*&lt;br /&gt;
| style=&amp;quot;text-align:right;&amp;quot; | 0.285*&lt;br /&gt;
|}&lt;br /&gt;
&lt;br /&gt;
* Reported by paper results.&lt;/div&gt;</summary>
		<author><name>Ldzhangyx</name></author>
		
	</entry>
</feed>