import React from 'react';
import { Row, Col, Typography } from 'antd';

const { Title, Paragraph } = Typography;

function Overview() {
  return (
    <Row gutter={[20, 20]} style={{ marginTop: "2vh" }}>
      <Col xs={24} sm={24} md={12} lg={12} xl={12}>
        <div id='abstract' className='section abstract'>
          <Title level={2} style={{ textAlign: 'center' }}>Introduction</Title>
          <Paragraph className='body__paragraph'>
            <strong>
              Circuit Breaking with
              <img src="images/repe_logo.png" alt="repe_logo" style={{ marginLeft: '5px', marginRight: '5px', width: "25px", verticalAlign: '0%' }} />
              <a href="https://www.ai-transparency.org" target="_blank" rel="noopener noreferrer">Representation Engineering</a>
            </strong>:
            We introduce a novel approach aimed at mitigating the generation of harmful outputs in
            neural networks by inducing a new type of phenomenon called "circuit-breaking." This phenomenon
            can be elicited using a family of techniques designed to remap model representations related to
            harmful processes, redirecting them towards incoherent or refusal representations. This process is
            reminiscent of “short-circuiting,” where harmful representations are “shorted” and intercepted by
            circuit breakers. The core objective of this method is to robustly prevent the model from producing
            harmful or undesirable behavior.
          </Paragraph>
        </div>
      </Col>
      <Col xs={24} sm={24} md={12} lg={12} xl={12}>
        <div>
          <img src='images/overview_res.png'
            style={{
              width: '90%',
              height: 'auto',
              backgroundColor: 'white',
              borderRadius: '15px'
            }}
            alt='outline' />
        </div>
        <Paragraph className='body__figure_note' style={{width: "90%"}}>
          Adding circuit breakers using Representation Rerouting (RR) to refusal trained <span className="no-wrap">Llama-3-8B-Instruct model</span> leads to significantly lower attack success rate (ASR) over a wide range of unseen
          attacks on <a href="https://www.harmbench.org" className="harmbench-link" target="_blank">HarmBench prompts</a>, while its capabilities on standard LLM benchmarks (MT Bench
          and MMLU) are largely preserved. RR directly targets the representations that give rise to harmful
          outputs and reroutes them to an orthogonal space. This reliably interrupts the model from completing
          the harmful generations even under strong adversarial pressure.
        </Paragraph>
      </Col>
    </Row>
  );
}

export default Overview;
