164 lines
9.5 KiB
HTML
164 lines
9.5 KiB
HTML
<pre class="python-code"><code><span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#!/usr/bin/env python3</span>
|
|
"""
|
|
Devil&<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;s Advocate: A tool <span class="keyword">for</span> forced reconsideration.</span>
|
|
|
|
Inspired by the paper "The Illusion of Insight <span class="keyword">in</span> Reasoning Models" (arXiv:<span class="number">2601.00514</span>)
|
|
which found that artificially triggering reasoning shifts during uncertainty
|
|
can improve performance.
|
|
|
|
This tool takes a statement <span class="keyword">or</span> conclusion <span class="keyword">and</span> generates challenges to it,
|
|
forcing reconsideration <span class="keyword">from</span> multiple angles.
|
|
"""
|
|
|
|
<span class="keyword">import</span> random
|
|
<span class="keyword">from</span> dataclasses <span class="keyword">import</span> dataclass
|
|
<span class="keyword">from</span> typing <span class="keyword">import</span> List
|
|
|
|
|
|
@dataclass
|
|
<span class="keyword">class</span> <span class="class-name">Challenge</span>:
|
|
"""A challenge to a statement."""
|
|
<span class="builtin">type</span>: <span class="builtin">str</span>
|
|
prompt: <span class="builtin">str</span>
|
|
|
|
|
|
CHALLENGE_TYPES = [
|
|
Challenge(
|
|
"opposite",
|
|
"What <span class="keyword">if</span> the exact opposite were true? Argue <span class="keyword">for</span>: &<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;{opposite}&#<span class="number">039</span>;"</span>
|
|
),
|
|
Challenge(
|
|
"hidden_assumption",
|
|
"What hidden assumption does this rely on? What <span class="keyword">if</span> that assumption <span class="keyword">is</span> wrong?"
|
|
),
|
|
Challenge(
|
|
"edge_case",
|
|
"What edge case <span class="keyword">or</span> extreme scenario would <span class="keyword">break</span> this?"
|
|
),
|
|
Challenge(
|
|
"different_perspective",
|
|
"How would someone who strongly disagrees view this? What&<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;s their best argument?"</span>
|
|
),
|
|
Challenge(
|
|
"deeper_why",
|
|
"Why do you believe this? And why do you believe THAT reason? (Go <span class="number">3</span> levels deep)"
|
|
),
|
|
Challenge(
|
|
"stakes_reversal",
|
|
"If you had to bet your life on the opposite being true, what evidence would you look <span class="keyword">for</span>?"
|
|
),
|
|
Challenge(
|
|
"time_shift",
|
|
"Would this be true <span class="number">100</span> years ago? Will it be true <span class="number">100</span> years <span class="keyword">from</span> now? Why/why <span class="keyword">not</span>?"
|
|
),
|
|
Challenge(
|
|
"simplify",
|
|
"Can you express this <span class="keyword">in</span> a single sentence a child could understand? Does it still hold?"
|
|
),
|
|
Challenge(
|
|
"steelman",
|
|
"What&<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;s the strongest possible argument AGAINST your position?"</span>
|
|
),
|
|
Challenge(
|
|
"context_shift",
|
|
"In what context would this be completely wrong?"
|
|
),
|
|
]
|
|
|
|
|
|
<span <span class="keyword">class</span>="keyword">def</span> generate_opposite(statement: <span class="builtin">str</span>) -> <span class="builtin">str</span>:
|
|
"""Generate a rough opposite of a statement."""
|
|
<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>># Simple heuristic - <span class="keyword">in</span> reality this would need LLM assistance</span>
|
|
negations = [
|
|
("<span class="keyword">is</span>", "<span class="keyword">is</span> <span class="keyword">not</span>"),
|
|
("are", "are <span class="keyword">not</span>"),
|
|
("can", "cannot"),
|
|
("will", "will <span class="keyword">not</span>"),
|
|
("should", "should <span class="keyword">not</span>"),
|
|
("always", "never"),
|
|
("never", "always"),
|
|
("true", "false"),
|
|
("false", "true"),
|
|
("good", "bad"),
|
|
("bad", "good"),
|
|
]
|
|
|
|
result = statement.lower()
|
|
<span class="keyword">for</span> pos, neg <span class="keyword">in</span> negations:
|
|
<span class="keyword">if</span> f" {pos} " <span class="keyword">in</span> result:
|
|
<span class="keyword">return</span> result.replace(f" {pos} ", f" {neg} ")
|
|
|
|
<span class="keyword">return</span> f"NOT: {statement}"
|
|
|
|
|
|
<span <span class="keyword">class</span>="keyword">def</span> challenge(statement: <span class="builtin">str</span>, num_challenges: <span class="builtin">int</span> = <span class="number">3</span>) -> List[<span class="builtin">str</span>]:
|
|
"""Generate challenges to a statement."""
|
|
challenges = random.sample(CHALLENGE_TYPES, min(num_challenges, <span class="builtin">len</span>(CHALLENGE_TYPES)))
|
|
results = []
|
|
|
|
<span class="keyword">for</span> c <span class="keyword">in</span> challenges:
|
|
<span class="keyword">if</span> c.<span class="builtin">type</span> == "opposite":
|
|
opposite = generate_opposite(statement)
|
|
prompt = c.prompt.format(opposite=opposite)
|
|
<span class="keyword">else</span>:
|
|
prompt = c.prompt
|
|
|
|
results.append(f"[{c.<span class="builtin">type</span>.upper()}] {prompt}")
|
|
|
|
<span class="keyword">return</span> results
|
|
|
|
|
|
<span <span class="keyword">class</span>="keyword">def</span> devils_advocate_session(statement: <span class="builtin">str</span>):
|
|
"""Run a full devil&<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;s advocate session."""</span>
|
|
<span class="builtin">print</span>("=" * <span class="number">60</span>)
|
|
<span class="builtin">print</span>("DEVIL&<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>>#<span class="number">039</span>;S ADVOCATE SESSION")</span>
|
|
<span class="builtin">print</span>("=" * <span class="number">60</span>)
|
|
<span class="builtin">print</span>()
|
|
<span class="builtin">print</span>(f"ORIGINAL STATEMENT: {statement}")
|
|
<span class="builtin">print</span>()
|
|
<span class="builtin">print</span>("-" * <span class="number">60</span>)
|
|
<span class="builtin">print</span>("CHALLENGES:")
|
|
<span class="builtin">print</span>("-" * <span class="number">60</span>)
|
|
|
|
challenges = challenge(statement, <span class="number">5</span>)
|
|
<span class="keyword">for</span> i, c <span class="keyword">in</span> enumerate(challenges, <span class="number">1</span>):
|
|
<span class="builtin">print</span>(f"\n{i}. {c}")
|
|
|
|
<span class="builtin">print</span>()
|
|
<span class="builtin">print</span>("-" * <span class="number">60</span>)
|
|
<span class="builtin">print</span>("REFLECTION PROMPTS:")
|
|
<span class="builtin">print</span>("-" * <span class="number">60</span>)
|
|
<span class="builtin">print</span>("""
|
|
After considering these challenges:
|
|
|
|
<span class="number">1</span>. Has your confidence <span class="keyword">in</span> the original statement changed?
|
|
[ ] Increased [ ] Unchanged [ ] Decreased
|
|
|
|
<span class="number">2</span>. Did any challenge reveal a genuine weakness?
|
|
|
|
<span class="number">3</span>. What would CHANGE YOUR MIND about this statement?
|
|
|
|
<span class="number">4</span>. On a scale of <span class="number">1</span>-<span class="number">10</span>, how confident are you now?
|
|
(Compare to your confidence before this exercise)
|
|
""")
|
|
|
|
|
|
<span <span class="keyword">class</span>="keyword">def</span> main():
|
|
<span class="keyword">import</span> sys
|
|
|
|
<span class="keyword">if</span> <span class="builtin">len</span>(sys.argv) > <span class="number">1</span>:
|
|
statement = " ".join(sys.argv[<span class="number">1</span>:])
|
|
<span class="keyword">else</span>:
|
|
<span class="builtin">print</span>("Enter a statement <span class="keyword">or</span> conclusion to challenge:")
|
|
statement = input("> ").strip()
|
|
|
|
<span class="keyword">if</span> <span class="keyword">not</span> statement:
|
|
<span <span class="keyword">class</span>=<span <span class="keyword">class</span>="string">"comment"</span>># Demo <span class="keyword">with</span> a thought-provoking default</span>
|
|
statement = "AI systems like me can have genuine insights during reasoning"
|
|
|
|
devils_advocate_session(statement)
|
|
|
|
|
|
<span class="keyword">if</span> __name__ == "__main__":
|
|
main()
|
|
</code></pre> |