-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
822600f
commit f8e91c2
Showing
23 changed files
with
6,424 additions
and
893 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,357 @@ | ||
<!DOCTYPE html> | ||
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head> | ||
|
||
<meta charset="utf-8"> | ||
<meta name="generator" content="quarto-1.3.450"> | ||
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes"> | ||
|
||
<meta name="dcterms.date" content="2023-10-27"> | ||
|
||
<title>goal-misgeneralization</title> | ||
<style> | ||
code{white-space: pre-wrap;} | ||
span.smallcaps{font-variant: small-caps;} | ||
div.columns{display: flex; gap: min(4vw, 1.5em);} | ||
div.column{flex: auto; overflow-x: auto;} | ||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} | ||
ul.task-list{list-style: none;} | ||
ul.task-list li input[type="checkbox"] { | ||
width: 0.8em; | ||
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ | ||
vertical-align: middle; | ||
} | ||
</style> | ||
|
||
|
||
<script src="site_libs/quarto-nav/quarto-nav.js"></script> | ||
<script src="site_libs/clipboard/clipboard.min.js"></script> | ||
<script src="site_libs/quarto-search/autocomplete.umd.js"></script> | ||
<script src="site_libs/quarto-search/fuse.min.js"></script> | ||
<script src="site_libs/quarto-search/quarto-search.js"></script> | ||
<meta name="quarto:offset" content="./"> | ||
<link href="./res/favicon/favicon.ico" rel="icon"> | ||
<script src="site_libs/quarto-html/quarto.js"></script> | ||
<script src="site_libs/quarto-html/popper.min.js"></script> | ||
<script src="site_libs/quarto-html/tippy.umd.min.js"></script> | ||
<script src="site_libs/quarto-html/anchor.min.js"></script> | ||
<link href="site_libs/quarto-html/tippy.css" rel="stylesheet"> | ||
<link href="site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles"> | ||
<script src="site_libs/bootstrap/bootstrap.min.js"></script> | ||
<link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet"> | ||
<link href="site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light"> | ||
<script id="quarto-search-options" type="application/json">{ | ||
"location": "sidebar", | ||
"copy-button": false, | ||
"collapse-after": 3, | ||
"panel-placement": "start", | ||
"type": "textbox", | ||
"limit": 20, | ||
"language": { | ||
"search-no-results-text": "No results", | ||
"search-matching-documents-text": "matching documents", | ||
"search-copy-link-title": "Copy link to search", | ||
"search-hide-matches-text": "Hide additional matches", | ||
"search-more-match-text": "more match in this document", | ||
"search-more-matches-text": "more matches in this document", | ||
"search-clear-button-title": "Clear", | ||
"search-detached-cancel-button-title": "Cancel", | ||
"search-submit-button-title": "Submit", | ||
"search-label": "Search" | ||
} | ||
}</script> | ||
<script type="application/json" class="js-hypothesis-config"> | ||
{ | ||
"openSidebar": false, | ||
"showHighlights": "whenSidebarOpen", | ||
"theme": "clean" | ||
} | ||
</script> | ||
<script async="" src="https://hypothes.is/embed.js"></script> | ||
|
||
|
||
<link rel="stylesheet" href="res/default.css"> | ||
</head> | ||
|
||
<body class="fullcontent"> | ||
|
||
<div id="quarto-search-results"></div> | ||
<!-- content --> | ||
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article"> | ||
<!-- sidebar --> | ||
<!-- margin-sidebar --> | ||
|
||
<!-- main --> | ||
<main class="content" id="quarto-document-content"> | ||
|
||
|
||
|
||
<section id="goal-misgeneralization" class="level1"> | ||
<h1>Goal Misgeneralization</h1> | ||
<p>The range of environments in which an AI’s behavior is different from its training environment.</p> | ||
<blockquote class="blockquote"> | ||
<p>But this includes environments in which the AI acts uncapably.</p> | ||
</blockquote> | ||
<section id="goal-directedness-is-an-underdefined-concept" class="level2"> | ||
<h2 class="anchored" data-anchor-id="goal-directedness-is-an-underdefined-concept">Goal-directedness is an underdefined concept</h2> | ||
<p><a href="https://deepmindsafetyresearch.medium.com/goal-misgeneralisation-why-correct-specifications-arent-enough-for-correct-goals-cf96ebc60924">Robin Shah et al.</a> use “how easy a model can be fine tuned to some task” as a measure for the degree of that models capability for that task.<br> | ||
I don’t like this tuneableness.</p> | ||
<p><a href="https://arxiv.org/abs/2105.14111">Langosco et al</a> might have a better definition but I have to check that out still.</p> | ||
|
||
|
||
</section> | ||
</section> | ||
|
||
</main> <!-- /main --> | ||
<script id="quarto-html-after-body" type="application/javascript"> | ||
window.document.addEventListener("DOMContentLoaded", function (event) { | ||
const toggleBodyColorMode = (bsSheetEl) => { | ||
const mode = bsSheetEl.getAttribute("data-mode"); | ||
const bodyEl = window.document.querySelector("body"); | ||
if (mode === "dark") { | ||
bodyEl.classList.add("quarto-dark"); | ||
bodyEl.classList.remove("quarto-light"); | ||
} else { | ||
bodyEl.classList.add("quarto-light"); | ||
bodyEl.classList.remove("quarto-dark"); | ||
} | ||
} | ||
const toggleBodyColorPrimary = () => { | ||
const bsSheetEl = window.document.querySelector("link#quarto-bootstrap"); | ||
if (bsSheetEl) { | ||
toggleBodyColorMode(bsSheetEl); | ||
} | ||
} | ||
toggleBodyColorPrimary(); | ||
const icon = ""; | ||
const anchorJS = new window.AnchorJS(); | ||
anchorJS.options = { | ||
placement: 'right', | ||
icon: icon | ||
}; | ||
anchorJS.add('.anchored'); | ||
const isCodeAnnotation = (el) => { | ||
for (const clz of el.classList) { | ||
if (clz.startsWith('code-annotation-')) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
const clipboard = new window.ClipboardJS('.code-copy-button', { | ||
text: function(trigger) { | ||
const codeEl = trigger.previousElementSibling.cloneNode(true); | ||
for (const childEl of codeEl.children) { | ||
if (isCodeAnnotation(childEl)) { | ||
childEl.remove(); | ||
} | ||
} | ||
return codeEl.innerText; | ||
} | ||
}); | ||
clipboard.on('success', function(e) { | ||
// button target | ||
const button = e.trigger; | ||
// don't keep focus | ||
button.blur(); | ||
// flash "checked" | ||
button.classList.add('code-copy-button-checked'); | ||
var currentTitle = button.getAttribute("title"); | ||
button.setAttribute("title", "Copied!"); | ||
let tooltip; | ||
if (window.bootstrap) { | ||
button.setAttribute("data-bs-toggle", "tooltip"); | ||
button.setAttribute("data-bs-placement", "left"); | ||
button.setAttribute("data-bs-title", "Copied!"); | ||
tooltip = new bootstrap.Tooltip(button, | ||
{ trigger: "manual", | ||
customClass: "code-copy-button-tooltip", | ||
offset: [0, -8]}); | ||
tooltip.show(); | ||
} | ||
setTimeout(function() { | ||
if (tooltip) { | ||
tooltip.hide(); | ||
button.removeAttribute("data-bs-title"); | ||
button.removeAttribute("data-bs-toggle"); | ||
button.removeAttribute("data-bs-placement"); | ||
} | ||
button.setAttribute("title", currentTitle); | ||
button.classList.remove('code-copy-button-checked'); | ||
}, 1000); | ||
// clear code selection | ||
e.clearSelection(); | ||
}); | ||
function tippyHover(el, contentFn) { | ||
const config = { | ||
allowHTML: true, | ||
content: contentFn, | ||
maxWidth: 500, | ||
delay: 100, | ||
arrow: false, | ||
appendTo: function(el) { | ||
return el.parentElement; | ||
}, | ||
interactive: true, | ||
interactiveBorder: 10, | ||
theme: 'quarto', | ||
placement: 'bottom-start' | ||
}; | ||
window.tippy(el, config); | ||
} | ||
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]'); | ||
for (var i=0; i<noterefs.length; i++) { | ||
const ref = noterefs[i]; | ||
tippyHover(ref, function() { | ||
// use id or data attribute instead here | ||
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href'); | ||
try { href = new URL(href).hash; } catch {} | ||
const id = href.replace(/^#\/?/, ""); | ||
const note = window.document.getElementById(id); | ||
return note.innerHTML; | ||
}); | ||
} | ||
let selectedAnnoteEl; | ||
const selectorForAnnotation = ( cell, annotation) => { | ||
let cellAttr = 'data-code-cell="' + cell + '"'; | ||
let lineAttr = 'data-code-annotation="' + annotation + '"'; | ||
const selector = 'span[' + cellAttr + '][' + lineAttr + ']'; | ||
return selector; | ||
} | ||
const selectCodeLines = (annoteEl) => { | ||
const doc = window.document; | ||
const targetCell = annoteEl.getAttribute("data-target-cell"); | ||
const targetAnnotation = annoteEl.getAttribute("data-target-annotation"); | ||
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation)); | ||
const lines = annoteSpan.getAttribute("data-code-lines").split(","); | ||
const lineIds = lines.map((line) => { | ||
return targetCell + "-" + line; | ||
}) | ||
let top = null; | ||
let height = null; | ||
let parent = null; | ||
if (lineIds.length > 0) { | ||
//compute the position of the single el (top and bottom and make a div) | ||
const el = window.document.getElementById(lineIds[0]); | ||
top = el.offsetTop; | ||
height = el.offsetHeight; | ||
parent = el.parentElement.parentElement; | ||
if (lineIds.length > 1) { | ||
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]); | ||
const bottom = lastEl.offsetTop + lastEl.offsetHeight; | ||
height = bottom - top; | ||
} | ||
if (top !== null && height !== null && parent !== null) { | ||
// cook up a div (if necessary) and position it | ||
let div = window.document.getElementById("code-annotation-line-highlight"); | ||
if (div === null) { | ||
div = window.document.createElement("div"); | ||
div.setAttribute("id", "code-annotation-line-highlight"); | ||
div.style.position = 'absolute'; | ||
parent.appendChild(div); | ||
} | ||
div.style.top = top - 2 + "px"; | ||
div.style.height = height + 4 + "px"; | ||
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter"); | ||
if (gutterDiv === null) { | ||
gutterDiv = window.document.createElement("div"); | ||
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter"); | ||
gutterDiv.style.position = 'absolute'; | ||
const codeCell = window.document.getElementById(targetCell); | ||
const gutter = codeCell.querySelector('.code-annotation-gutter'); | ||
gutter.appendChild(gutterDiv); | ||
} | ||
gutterDiv.style.top = top - 2 + "px"; | ||
gutterDiv.style.height = height + 4 + "px"; | ||
} | ||
selectedAnnoteEl = annoteEl; | ||
} | ||
}; | ||
const unselectCodeLines = () => { | ||
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"]; | ||
elementsIds.forEach((elId) => { | ||
const div = window.document.getElementById(elId); | ||
if (div) { | ||
div.remove(); | ||
} | ||
}); | ||
selectedAnnoteEl = undefined; | ||
}; | ||
// Attach click handler to the DT | ||
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]'); | ||
for (const annoteDlNode of annoteDls) { | ||
annoteDlNode.addEventListener('click', (event) => { | ||
const clickedEl = event.target; | ||
if (clickedEl !== selectedAnnoteEl) { | ||
unselectCodeLines(); | ||
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active'); | ||
if (activeEl) { | ||
activeEl.classList.remove('code-annotation-active'); | ||
} | ||
selectCodeLines(clickedEl); | ||
clickedEl.classList.add('code-annotation-active'); | ||
} else { | ||
// Unselect the line | ||
unselectCodeLines(); | ||
clickedEl.classList.remove('code-annotation-active'); | ||
} | ||
}); | ||
} | ||
const findCites = (el) => { | ||
const parentEl = el.parentElement; | ||
if (parentEl) { | ||
const cites = parentEl.dataset.cites; | ||
if (cites) { | ||
return { | ||
el, | ||
cites: cites.split(' ') | ||
}; | ||
} else { | ||
return findCites(el.parentElement) | ||
} | ||
} else { | ||
return undefined; | ||
} | ||
}; | ||
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]'); | ||
for (var i=0; i<bibliorefs.length; i++) { | ||
const ref = bibliorefs[i]; | ||
const citeInfo = findCites(ref); | ||
if (citeInfo) { | ||
tippyHover(citeInfo.el, function() { | ||
var popup = window.document.createElement('div'); | ||
citeInfo.cites.forEach(function(cite) { | ||
var citeDiv = window.document.createElement('div'); | ||
citeDiv.classList.add('hanging-indent'); | ||
citeDiv.classList.add('csl-entry'); | ||
var biblioDiv = window.document.getElementById('ref-' + cite); | ||
if (biblioDiv) { | ||
citeDiv.innerHTML = biblioDiv.innerHTML; | ||
} | ||
popup.appendChild(citeDiv); | ||
}); | ||
return popup.innerHTML; | ||
}); | ||
} | ||
} | ||
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//); | ||
var filterRegex = new RegExp("https:\/\/www\.mousetrap\.blog"); | ||
var isInternal = (href) => { | ||
return filterRegex.test(href) || localhostRegex.test(href); | ||
} | ||
// Inspect non-navigation links and adorn them if external | ||
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item)'); | ||
for (var i=0; i<links.length; i++) { | ||
const link = links[i]; | ||
if (!isInternal(link.href)) { | ||
// default icon | ||
link.classList.add("external"); | ||
} | ||
} | ||
}); | ||
</script> | ||
</div> <!-- /content --> | ||
|
||
|
||
|
||
</body></html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.