import React from 'react';
import { Helmet } from 'react-helmet-async';
import { motion } from 'framer-motion';
import { Link } from 'react-router-dom';
import { AuthorSection, ShinyButton } from '../shared/Shared';
import { generateMetadata } from '../../metadata/siteMetadata';
import { getAuthorSection } from '../../metadata/authorMetadata';
import { FaSearch } from 'react-icons/fa';
import ImageCarousel from '../shared/ImageCarousel';
import { PageMetadata } from '../shared/PageMetadata';
import { ARTICLE_DATES } from '../../constants/dates';

const pageMetadata = {
  title: 'Analysis of High-Confidence AI Predictions',
  description: 'Explore how our model identifies critical findings that were initially overlooked in radiologist reports.',
  type: 'article',
  publishedDate: ARTICLE_DATES.MISS_ANALYSIS,
  modifiedDate: ARTICLE_DATES.MISS_ANALYSIS,
  authors: [
    {
      name: 'Pranav Rajpurkar',
      url: 'https://www.linkedin.com/in/pranavrajpurkar/'
    },
    {
      name: 'Samir Rajpurkar',
      url: 'https://www.linkedin.com/in/samir-rajpurkar-79291396/'
    }
  ]
};

const HighConfidenceRadiologistMisses: React.FC<{ openCalendly: () => void }> = ({ openCalendly }) => {
  const metadata = generateMetadata({
    title: 'High-Confidence AI Predictions: Analysis of Model Performance',
    description: 'Explore how our model identifies critical findings that were initially overlooked in radiologist reports, demonstrating the value of AI-assisted analysis.',
    type: 'article',
    publishedDate: ARTICLE_DATES.MISS_ANALYSIS,
    modifiedDate: ARTICLE_DATES.MISS_ANALYSIS
  });

  const authorSection = getAuthorSection('November 4, 2024');

  const acuteConditionsImages = [
    {
      src: "/figs/validation/missanalysis/pancreatitis.png",
      alt: "CT scan showing subtle signs of acute pancreatitis",
      caption: "CT scan showing subtle pancreatic inflammation with peripancreatic stranding, initially missed in the radiology report but detected by a2z-1."
    },
    {
      src: "/figs/validation/missanalysis/cholecystitis.png",
      alt: "CT scan showing cholecystitis findings",
      caption: "CT scan demonstrating pericholecystic inflammatory changes suggestive of cholecystitis, where AI provided more definitive interpretation."
    }
  ];

  const criticalFindingImages = [
    {
      src: "/figs/validation/missanalysis/free_air.png",
      alt: "CT scan showing subtle free air",
      caption: "CT scan showing subtle free air in the peritoneum (81.84% confidence by a2z-1) that was missed in the initial radiology report."
    }
  ];

  const incidentalFindingImages = [
    {
      src: "/figs/validation/missanalysis/hiatal_hernia.png",
      alt: "CT scan showing hiatal hernia",
      caption: "Incidental hiatal hernia detected by a2z-1 but not mentioned in the original report."
    },
    {
      src: "/figs/validation/missanalysis/cac.png",
      alt: "CT scan showing coronary artery calcification",
      caption: "Coronary artery calcification identified by a2z-1 as an incidental finding."
    }
  ];

  return (
    <div className="min-h-screen bg-white dark:bg-dark text-gray-900 dark:text-gray-100">
      <Helmet>
        <title>{metadata.title}</title>
        <meta name="description" content={metadata.description} />
        <meta property="og:title" content={metadata.openGraph.title} />
        <meta property="og:description" content={metadata.openGraph.description} />
        <meta property="og:image" content={metadata.openGraph.image} />
        <meta name="twitter:card" content={metadata.twitter.card} />
        <link rel="canonical" href="https://a2zradiology.ai/validation/miss-analysis" />
        <script type="application/ld+json">
          {JSON.stringify(metadata.jsonLd)}
        </script>
      </Helmet>

      <PageMetadata {...pageMetadata} />

      <main className="max-w-4xl mx-auto py-16 px-4 sm:px-6 lg:px-8">
        <motion.h1
          className="text-4xl sm:text-5xl font-bold mb-8 text-primary dark:text-accent"
          initial={{ opacity: 0, y: 20 }}
          animate={{ opacity: 1, y: 0 }}
          transition={{ duration: 0.6 }}
        >
          {metadata.openGraph.title}
        </motion.h1>

        <AuthorSection {...authorSection} />

        <motion.div
          className="text-md sm:text-xl space-y-6 text-gray-800 dark:text-gray-200"
          initial={{ opacity: 0 }}
          animate={{ opacity: 1 }}
          transition={{ duration: 0.6, delay: 0.4 }}
        >
          <p>
            At a2z Radiology AI, we let science lead the way. Rigorous validation and clinical
            assessment form the foundation of every insight generated by our models, ensuring that
            our AI is not only technically advanced but also scientifically sound and impactful in
            real-world radiology practice.
          </p>

          <p>
            This analysis focuses on cases from an external dataset, where the a2z-1 model predicted
            a pathology with high confidence (categorized as "likely") but the corresponding ground
            truth labels, derived from radiology reports written by US board-certified radiologists,
            indicated the pathology was absent. External validation, in this context, refers to
            evaluating the model on data it has never encountered before, collected from sources
            different from the training dataset, to ensure its robustness and generalizability.
            These findings reveal some remarkable victories for the AI, showcasing where our model
            truly adds value by catching otherwise overlooked conditions. Let’s dive in!
          </p>

          <h2 className="text-2xl font-bold mt-8 mb-4">Manual Review of False Positives</h2>

          <h3 className="text-xl font-semibold mt-6 mb-4">
            Acute Inflammatory Conditions: Pancreatitis and Cholecystitis
          </h3>
          <p>
            A manual review of select high-confidence false positive cases uncovered additional
            insights that showcased the strengths of the a2z-1 model. In one instance, the model
            predicted acute pancreatitis with high confidence, and while the initial radiology
            report, authored by a US board-certified radiologist, stated, "the pancreas is
            unremarkable," further review revealed subtle swelling of the pancreatic head and body
            with some surrounding stranding and fluid—classic early signs of acute pancreatitis.
          </p>
          <p>
            In another case, the model confidently predicted cholecystitis, while the radiologist
            noted "questionable mild pericholecystic inflammatory changes" without explicitly
            raising the possibility of cholecystitis. Here, the AI stepped in to reduce
            the ambiguity—a key example of how AI can bolster radiologist decision-making by cutting
            through uncertainty. This capability promotes more actionable, direct reports,
            potentially leading to faster intervention and better patient outcomes.
          </p>

          <div className="my-12">
            <ImageCarousel images={acuteConditionsImages} />
          </div>

          <h3 className="text-xl font-semibold mt-6 mb-4">
            Critical Finding: Free Air Detection
          </h3>
          <p>
            In another notable instance, the model detected free air in the peritoneum with high
            confidence (81.84%), whereas the original radiologist's report stated, "No evidence of
            free fluid nor free air". This highlights the AI's potential to uncover
            subtle but critical findings that may be overlooked during initial evaluations.
          </p>

          <div className="my-12">
            <ImageCarousel images={criticalFindingImages} />
          </div>

          <h3 className="text-xl font-semibold mt-6 mb-4">
            Incidental Findings: Hidden Wins for AI
          </h3>
          <p>
            Our analysis also revealed instances where our model identified non-urgent incidental
            findings, such as hiatal hernias and coronary artery calcification, which were present
            in the imaging studies but not documented in the corresponding radiological reports.
            These are "hidden wins" for AI—a reminder that even secondary findings can matter for
            comprehensive patient care.
          </p>

          <div className="my-12">
            <ImageCarousel images={incidentalFindingImages} />
          </div>

          <h2 className="text-2xl font-bold mt-8 mb-4">Detection of Similar Pathologies</h2>

          <h3 className="text-xl font-semibold mt-6 mb-4">
            When AI Sees the Bigger Picture
          </h3>
          <p>
            In several instances, we observed that the model identified pathological findings that,
            while not explicitly mentioned in the radiological report, were similar to the actual
            conditions and carried similar clinical implications. This phenomenon suggests that the
            model may be capturing underlying patterns indicative of broader disease categories
            rather than specific diagnoses.
          </p>
          <p>
            For example, in one case, the model predicted colitis in a patient who was actually
            diagnosed with diverticulitis of the descending colon. Both conditions represent
            inflammatory processes affecting the colon, albeit with distinct etiologies and
            manifestations. This misclassification, while not entirely accurate, demonstrates the
            model's uncanny ability to detect inflammatory changes in the colonic region that may
            have shared features.
          </p>
          <p>
            Another illustrative case involved a patient with radiological findings consistent with
            hepatic steatosis and portal hypertension. The report detailed fatty infiltration of the
            liver, accompanied by hepatosplenomegaly and recanalized paraumbilical veins. The model,
            however, predicted cirrhosis. While not precisely correct, this prediction aligns with
            the constellation of findings often associated with chronic liver disease—highlighting
            the AI's skill at piecing together complex patterns.
          </p>

          <div className="bg-blue-50 dark:bg-blue-900/30 rounded-lg p-6 my-8 ml-8 border-l-4 border-blue-500 dark:border-blue-400">
            <div className="flex items-center gap-3 mb-4">
              <FaSearch className="text-blue-500 dark:text-blue-400 text-xl" />
              <h4 className="text-lg font-semibold text-blue-800 dark:text-blue-300">
                Inflammatory Process Ambiguity
              </h4>
            </div>
            <p className="text-gray-700 dark:text-gray-300">
              The model flagged inflammatory changes suggestive of colitis, which was subsequently
              reported as diverticulitis. Both involve colonic inflammation but differ in distribution
              and associated findings, which may not always be distinguishable in early-stage imaging.
              The overlap here highlights the AI's tendency to generalize inflammatory markers,
              emphasizing the need for further clinical context.
            </p>
          </div>

          <p>
            These examples show that even when the model's predictions were not entirely correct, it
            still managed to identify relevant abnormalities within the same general category. This
            demonstrates the model's potential to highlight areas that need attention, even if the
            exact diagnosis isn't perfect. In practice, this means the AI can serve as a valuable
            tool for radiologists, prompting them to take a closer look at findings that might
            otherwise be missed.
          </p>
          <p>
            For instance, even when diverticulitis was misclassified as colitis, the model still
            identified inflammation in the correct region, which could lead to a more thorough
            evaluation and quicker diagnosis. These insights reinforce the importance of interpreting
            AI outputs alongside clinical judgment, as the AI can guide radiologists towards
            abnormalities that need their attention, even if the exact classification isn't always
            precise.
          </p>

          <h2 className="text-2xl font-bold mt-8 mb-4">Labeling Errors in Certain Cases</h2>

          <h3 className="text-xl font-semibold mt-6 mb-4">
            The Hidden Challenges of Labeling
          </h3>
          <p>
            In some instances, the model’s high-confidence predictions were correct, but the
            corresponding labels were incorrectly marked as negative due to errors in the
            report-to-label conversion process. In these cases, it is evident that the model's
            predictions were correct, but the labels were incorrect due to understandable human
            errors during the report-to-label conversion process.
          </p>
          <p>
            For example, in one case, the model confidently predicted the presence of retroperitoneal
            hemorrhage. The report noted, "subcapsular and perinephric hematoma," consistent with the
            model's prediction, but the ground truth label incorrectly indicated that no
            retroperitoneal hematoma was present. Similarly, in another case, the model accurately
            predicted an obstructive kidney stone, and the report explicitly stated, "a 2 mm stone in
            the distal left ureter causing mild hydronephrosis," yet the label was marked negative.
          </p>

          <div className="bg-blue-50 dark:bg-blue-900/30 rounded-lg p-6 my-8 ml-8 border-l-4 border-blue-500 dark:border-blue-400">
            <div className="flex items-center gap-3 mb-4">
              <FaSearch className="text-blue-500 dark:text-blue-400 text-xl" />
              <h4 className="text-lg font-semibold text-blue-800 dark:text-blue-300">
                Report-to-Label Conversion Challenges
              </h4>
            </div>
            <p className="text-gray-700 dark:text-gray-300">
              Terminology such as "trace," "mild," or "equivocal" presents challenges in
              report-to-label conversion, leading to mislabeling during AI training. These descriptors
              introduce ambiguity that may misalign with binary classification, underscoring the need
              for radiologist oversight to refine training labels and capture clinical nuance
              accurately.
            </p>
          </div>

          <p>
            While the proportion of labeling errors was less than 1%, these were overrepresented in
            the model error analysis, suggesting that a portion of the model's false positives
            stemmed from inaccuracies in the labeling process rather than issues with the model's
            performance. This overrepresentation doesn't indicate a systemic problem with the
            labeling methodology, but rather highlights the challenges in accurately labeling edge
            cases or ambiguous instances where the report is not completely explicit.
          </p>

          <h2 className="text-2xl font-bold mt-8 mb-4">
            Where AI Meets Radiology: Our Vision Forward
          </h2>
          <p>
            At a2z Radiology AI, we are driven by a simple yet powerful goal—leveraging AI to
            elevate the practice of radiology. The insights presented in this analysis reveal that
            our AI doesn't just replicate what is already known; it challenges assumptions, uncovers
            subtle findings, and adds value where it matters most. Our work underscores the essential
            partnership between AI and radiologists, where each complements the other's strengths. We
            look forward to collaborating with radiology practices across the globe to enhance
            diagnostic accuracy, reduce variability, and ultimately improve patient outcomes.
          </p>

          <div className="bg-blue-100 dark:bg-blue-900 p-4 rounded-lg mb-8">
            <p className="font-bold text-lg mb-2">Design Partnership Opportunity</p>
            <p>
              If you're looking to elevate your radiology practice with scientifically backed AI
              tools, we’re here to collaborate. The a2z-1 model is ready to work with your
              retrospective data, providing validation insights and benchmarking your practice
              against key quality metrics in abdominal imaging. Together, we can refine insights,
              improve reporting comprehensiveness, and push the boundaries of radiology quality
              improvement.
            </p>
            <ShinyButton onClick={openCalendly} className="mt-4">
              Explore partnership opportunities
            </ShinyButton>
          </div>
        </motion.div>

        <motion.div
          className="mt-12"
          initial={{ opacity: 0 }}
          animate={{ opacity: 1 }}
          transition={{ duration: 0.6, delay: 1.8 }}
        >
          <Link to="/" className="text-primary dark:text-accent hover:underline">
            &larr; Back to Home
          </Link>
        </motion.div>
      </main>
    </div>
  );
};

export default HighConfidenceRadiologistMisses;
