File size: 8,187 Bytes
1833799
d8b71b8
1833799
d8b71b8
 
 
 
 
1833799
d8b71b8
 
 
 
 
1833799
 
 
 
d8b71b8
 
 
 
 
1833799
d8b71b8
ceb3f2b
 
 
 
 
 
 
 
 
 
 
d8b71b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c68f76
d8b71b8
 
 
 
 
1c68f76
d8b71b8
1c68f76
d8b71b8
 
 
1833799
 
 
 
 
d8b71b8
1833799
d8b71b8
 
 
 
 
 
 
 
 
1833799
 
 
d8b71b8
1833799
d8b71b8
 
 
 
 
 
 
 
9fcc223
 
 
 
 
 
1833799
d8b71b8
1833799
9fcc223
d8b71b8
 
 
 
 
 
 
1c68f76
d8b71b8
1833799
 
d8b71b8
 
 
 
 
 
 
 
 
 
 
 
 
 
1833799
 
 
d8b71b8
1833799
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <meta name="description" content="Beyond ‘Aha!’ — Systematic Meta‑Ability Alignment in Large Reasoning Models presents a three‑stage recipe that explicitly teaches deduction, induction, and abduction, achieving state‑of‑the‑art reasoning performance." />
  <meta name="keywords" content="Meta‑Abilities, Deduction, Induction, Abduction, Reinforcement Learning, Large Reasoning Models" />
  <title>Beyond “Aha!” — Meta‑Ability Alignment for Reasoning Models</title>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet" />
  <link rel="stylesheet" href="./static/css/bulma.min.css" />
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css" />
  <link rel="stylesheet" href="./static/css/index.css" />
  <link rel="icon" href="./static/images/favicon.svg" />

  <script defer src="./static/js/fontawesome.all.min.js"></script>
</head>
<body>
  <!-- Header / Title -->
  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">Beyond “Aha!”: Systematic Meta‑Ability Alignment in Large Reasoning Models</h1>
              <div class="is-size-5 publication-authors">
                <span class="author-block"><a href="https://zhiyuanhubj.github.io/" target="_blank">Zhiyuan Hu</a><sup>1</sup>,</span>
                <span class="author-block"><a href="#" target="_blank">Yibo Wang</a><sup>2</sup>,</span>
                <span class="author-block"><a href="https://hendrydong.github.io/" target="_blank">Hanze Dong</a><sup>3</sup>,</span>
                <span class="author-block"><a href="#" target="_blank">Yuhui Xu</a><sup>3</sup>,</span>
                <span class="author-block"><a href="#" target="_blank"><strong>Amrita Saha</strong></a><sup>3</sup>,</span>
                <span class="author-block"><a href="http://cmxiong.com/" target="_blank"><strong>Caiming Xiong</strong></a><sup>3</sup>,</span>
                <span class="author-block"><a href="https://bhooi.github.io/" target="_blank"><strong>Bryan Hooi</strong></a><sup>1</sup>,</span>
                <span class="author-block"><a href="https://scholar.google.com/citations?user=MuUhwi0AAAAJ&hl=en" target="_blank"><strong>Junnan Li</strong></a><sup>3</sup></span>
              </div>

            <div class="is-size-5 publication-authors">
              <span class="author-block"><sup>1</sup>National University of Singapore,</span>
              <span class="author-block"><sup>2</sup>Tsinghua University,</span>
              <span class="author-block"><sup>3</sup>Salesforce AI Research</span>
            </div>

            <!-- Links -->
            <div class="column has-text-centered">
              <div class="publication-links">
                <span class="link-block">
                  <a href="https://github.com/zhiyuanhubj/Meta-Ability-Alignment/blob/main/Paper.pdf" target="_blank" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon"><i class="fas fa-file-pdf"></i></span>
                    <span>Paper</span>
                  </a>
                </span>
                <span class="link-block">
                  <a href="https://github.com/zhiyuanhubj/Meta-Ability-Alignment/blob/main/Paper.pdf" target="_blank" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon"><i class="ai ai-arxiv"></i></span>
                    <span>arXiv</span>
                  </a>
                </span>
                <span class="link-block">
                  <a href="https://github.com/zhiyuanhubj/Meta-Ability-Alignment" target="_blank" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon"><i class="fab fa-github"></i></span>
                    <span>Code</span>
                  </a>
                </span>
                <span class="link-block">
                  <a href="https://x.com/ZhiyuanCS/status/1922734609634296004" target="_blank" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon"><i class="far fa-images"></i></span>
                    <span>Twitter (X)</span>
                  </a>
                </span>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>
  </section>

  <!-- Abstract -->
  <section class="section">
    <div class="container is-max-desktop">
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          <h2 class="title is-3">Abstract</h2>
          <div class="content has-text-justified">
            <p>Large reasoning models (LRMs) possess a latent capacity for long chain‑of‑thought reasoning, but the timing and consistency of emergent “aha” behaviors remain unpredictable. We explicitly align LRMs with three meta‑abilities—<strong>deduction, induction, and abduction</strong>—using automatically generated, self‑verifiable tasks. Our three‑stage pipeline (individual alignment, parameter‑space merging, and domain‑specific reinforcement learning) lifts performance ceilings by&nbsp;≤10 % over instruction‑tuned baselines and delivers state‑of‑the‑art accuracy across math, coding, and science benchmarks.</p>
          </div>
        </div>
      </div>
    </div>
  </section>

  <!-- Results & Framework Figures -->
  <section class="section is-light">
    <div class="container is-max-desktop">
      <h2 class="title is-3 has-text-centered">Three‑Stage Training Framework</h2>
      <figure class="image">
        <img src="./static/images/framework.png" alt="Three‑stage meta‑ability alignment framework diagram." />
        <figcaption class="has-text-centered">Stage A: Meta‑ability alignment &nbsp;&nbsp; Stage B: Parameter‑space merging &nbsp;&nbsp; Stage C: Domain‑specific RL.</figcaption>
      </figure>
      <br />
      <h2 class="title is-3 has-text-centered">Key Results</h2>
      <figure class="image">
        <img src="./static/images/results.png" alt="Performance tables showing consistent gains from meta‑ability alignment." />
        <figcaption class="has-text-centered">Table&nbsp;1&nbsp;&amp;&nbsp;2: Meta‑ability alignment boosts reasoning performance at both 7B and 32B scales.</figcaption>
      </figure>
    </div>
  </section>

  
  <!-- Related Links (optional) -->
  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>@article{hu2025metaability,
  author  = {Hu, Zhiyuan and Wang, Yibo and Dong, Hanze and Xu, Yuhui and Saha, Amrita and Xiong, Caiming and Hooi, Bryan and Li, Junnan},
  title   = {Beyond “Aha!”: Systematic Meta‑Ability Alignment in Large Reasoning Models},
  journal = {Arxiv},
  year    = {2025}
}</code></pre>
    </div>
  </section>

  <footer class="footer">
    <div class="container">
      <div class="content has-text-centered">
        <a class="icon-link" target="_blank" href="https://github.com/zhiyuanhubj/Meta-Ability-Alignment/blob/main/Paper.pdf"><i class="fas fa-file-pdf"></i></a>
        <a class="icon-link" target="_blank" href="https://github.com/your‑repo"><i class="fab fa-github"></i></a>
      </div>
      <div class="columns is-centered">
        <div class="column is-8">
          <div class="content">
            <p>This website is licensed under a <a rel="license" target="_blank" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution‑ShareAlike 4.0 International License</a>.</p>
            <p>You are free to reuse the <a target="_blank" href="https://github.com/nerfies/nerfies.github.io">source code</a>; please include a link back in the footer.</p>
          </div>
        </div>
      </div>
    </div>
  </footer>
</body>
</html>