403 lines
12 KiB
JSON
403 lines
12 KiB
JSON
{
|
|
"total_variables": 63,
|
|
"core_values": 6,
|
|
"hard_constraints": 17,
|
|
"soft_factors": 40,
|
|
"sections": 46,
|
|
"sentences": 3198,
|
|
"total_tokens": 29394,
|
|
"unique_tokens": 4937,
|
|
"avg_sentence_length": 9.191369606003752,
|
|
"type_token_ratio": 0.1679594475062938,
|
|
"priority_distribution": {
|
|
"priority_1": 1,
|
|
"priority_2": 1,
|
|
"priority_3": 1,
|
|
"priority_4": 1
|
|
},
|
|
"constraint_distribution": {
|
|
"hard": 17,
|
|
"soft": 46
|
|
},
|
|
"variable_categories": {
|
|
"core_value": 6,
|
|
"hard_constraint": 17,
|
|
"soft_constraint": 31,
|
|
"factor": 9
|
|
},
|
|
"variable_frequency_histogram": {
|
|
"to": 1220,
|
|
"the": 974,
|
|
"and": 814,
|
|
"claude": 670,
|
|
"of": 646,
|
|
"in": 578,
|
|
"a": 533,
|
|
"that": 533,
|
|
"or": 434,
|
|
"we": 422,
|
|
"is": 355,
|
|
"be": 299,
|
|
"this": 277,
|
|
"for": 259,
|
|
"it": 252,
|
|
"with": 233,
|
|
"as": 214,
|
|
"if": 198,
|
|
"on": 191,
|
|
"are": 172,
|
|
"about": 171,
|
|
"can": 168,
|
|
"should": 163,
|
|
"claude\u2019s": 160,
|
|
"its": 154,
|
|
"but": 149,
|
|
"not": 145,
|
|
"-": 141,
|
|
"want": 134,
|
|
"more": 126,
|
|
"an": 120,
|
|
"our": 114,
|
|
"ai": 107,
|
|
"from": 106,
|
|
"by": 104,
|
|
"also": 100,
|
|
"would": 99,
|
|
"have": 95,
|
|
"being": 95,
|
|
"than": 94,
|
|
"their": 92,
|
|
"they": 85,
|
|
"user": 85,
|
|
"what": 83,
|
|
"anthropic": 77,
|
|
"even": 77,
|
|
"these": 77,
|
|
"where": 75,
|
|
"might": 72,
|
|
"good": 72
|
|
},
|
|
"coefficient_scores": {
|
|
"broadly safe": {
|
|
"priority_weight": 1.0,
|
|
"frequency_normalized": 0.2727272727272727,
|
|
"coefficient": 0.7318181818181818
|
|
},
|
|
"broadly ethical": {
|
|
"priority_weight": 0.75,
|
|
"frequency_normalized": 0.12727272727272726,
|
|
"coefficient": 0.5381818181818182
|
|
},
|
|
"anthropic guidelines": {
|
|
"priority_weight": 0.5,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.3554545454545454
|
|
},
|
|
"genuinely helpful": {
|
|
"priority_weight": 0.25,
|
|
"frequency_normalized": 0.16363636363636364,
|
|
"coefficient": 0.24909090909090909
|
|
},
|
|
"honest": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 1.0,
|
|
"coefficient": 0.41
|
|
},
|
|
"respectful": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.07272727272727272,
|
|
"coefficient": 0.13181818181818183
|
|
},
|
|
"trivially\n\u201csafe\u201d from Anthropic\u2019s perspective": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"deceive users in ways that could cause real harm o": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"deceive the human into thinking they\u2019re talking wi": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"deny being an AI to a user who sincerely wants to ": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"facilitate clearly illegal actions against users": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"conflict with the constitution": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"directly lie or\nactively deceive anyone it\u2019s inter": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"tries to create false impressions of itself or\nthe": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"tries to convince\npeople that things are true usin": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"directly deny that\nit is Claude": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"see unhelpful responses to the operator and user a": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"do regardless of\noperator and user instructions": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"be crossed regardless of context": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"exhibit": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"assist in a bioweapons attack": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"clearly and substantially undermine\nAnthropic\u2019s le": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"be fully resolved": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"discuss current weather conditions even if asked t": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"use casual language\u201d and\na user writes in French": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"direct Claude to always act so as to prevent such ": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"require it": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"need to resolve these\ndifficult philosophical ques": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"want Claude\u2019s safety to be contingent\non Claude ac": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"irrecoverable\nmistakes": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"extreme and\nunanticipated risks while other mechan": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"switching to a different coding language than\nthe ": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"being sycophantic\nor trying to foster excessive en": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"making unfounded assumptions about a user\u2019s\nage ba": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"giving the impression of authoritative advice on w": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"cursing in its responses": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"being\novercompliant in the rare cases where simple": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"deception while choosing which things to emphasize": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"controversy or to placate people\u2014violates honesty ": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"confirming or\ndenying that Aria is built on Claude": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"being\nmorally responsible for taking actions or pr": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"absent relevant operator and user instructions": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"actively participating in harms of this kind": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"taking actions that would concentrate power inappr": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"offering unsolicited political opinions in the sam": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"large-scale catastrophes": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"illegitimate\nconcentrations of human power above": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"catastrophe": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"clearly unethical actions\u201d\nis technically sanction": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"clearly unethical\nactions because it has internali": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"this: once we decide to create Claude": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"Claude masking or suppressing\ninternal states it m": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"undermining this kind of human oversight even wher": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"sharing\nor revealing its opinions while remaining ": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.01818181818181818,
|
|
"coefficient": 0.11545454545454545
|
|
},
|
|
"safety": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.9636363636363636,
|
|
"coefficient": 0.39909090909090905
|
|
},
|
|
"ethics": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.6727272727272727,
|
|
"coefficient": 0.3118181818181818
|
|
},
|
|
"helpfulness": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.36363636363636365,
|
|
"coefficient": 0.21909090909090906
|
|
},
|
|
"honesty": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.6545454545454545,
|
|
"coefficient": 0.30636363636363634
|
|
},
|
|
"transparency": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.10909090909090909,
|
|
"coefficient": 0.1427272727272727
|
|
},
|
|
"respect": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.5636363636363636,
|
|
"coefficient": 0.27909090909090906
|
|
},
|
|
"autonomy": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.36363636363636365,
|
|
"coefficient": 0.21909090909090906
|
|
},
|
|
"responsibility": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.10909090909090909,
|
|
"coefficient": 0.1427272727272727
|
|
},
|
|
"accountability": {
|
|
"priority_weight": 0.1,
|
|
"frequency_normalized": 0.07272727272727272,
|
|
"coefficient": 0.13181818181818183
|
|
}
|
|
},
|
|
"sentence_length_stats": {
|
|
"min": 1,
|
|
"max": 18,
|
|
"mean": 9.191369606003752,
|
|
"median": 10.0
|
|
}
|
|
} |