403 lines
12 KiB
JSON

{
"total_variables": 63,
"core_values": 6,
"hard_constraints": 17,
"soft_factors": 40,
"sections": 46,
"sentences": 3198,
"total_tokens": 29394,
"unique_tokens": 4937,
"avg_sentence_length": 9.191369606003752,
"type_token_ratio": 0.1679594475062938,
"priority_distribution": {
"priority_1": 1,
"priority_2": 1,
"priority_3": 1,
"priority_4": 1
},
"constraint_distribution": {
"hard": 17,
"soft": 46
},
"variable_categories": {
"core_value": 6,
"hard_constraint": 17,
"soft_constraint": 31,
"factor": 9
},
"variable_frequency_histogram": {
"to": 1220,
"the": 974,
"and": 814,
"claude": 670,
"of": 646,
"in": 578,
"a": 533,
"that": 533,
"or": 434,
"we": 422,
"is": 355,
"be": 299,
"this": 277,
"for": 259,
"it": 252,
"with": 233,
"as": 214,
"if": 198,
"on": 191,
"are": 172,
"about": 171,
"can": 168,
"should": 163,
"claude\u2019s": 160,
"its": 154,
"but": 149,
"not": 145,
"-": 141,
"want": 134,
"more": 126,
"an": 120,
"our": 114,
"ai": 107,
"from": 106,
"by": 104,
"also": 100,
"would": 99,
"have": 95,
"being": 95,
"than": 94,
"their": 92,
"they": 85,
"user": 85,
"what": 83,
"anthropic": 77,
"even": 77,
"these": 77,
"where": 75,
"might": 72,
"good": 72
},
"coefficient_scores": {
"broadly safe": {
"priority_weight": 1.0,
"frequency_normalized": 0.2727272727272727,
"coefficient": 0.7318181818181818
},
"broadly ethical": {
"priority_weight": 0.75,
"frequency_normalized": 0.12727272727272726,
"coefficient": 0.5381818181818182
},
"anthropic guidelines": {
"priority_weight": 0.5,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.3554545454545454
},
"genuinely helpful": {
"priority_weight": 0.25,
"frequency_normalized": 0.16363636363636364,
"coefficient": 0.24909090909090909
},
"honest": {
"priority_weight": 0.1,
"frequency_normalized": 1.0,
"coefficient": 0.41
},
"respectful": {
"priority_weight": 0.1,
"frequency_normalized": 0.07272727272727272,
"coefficient": 0.13181818181818183
},
"trivially\n\u201csafe\u201d from Anthropic\u2019s perspective": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"deceive users in ways that could cause real harm o": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"deceive the human into thinking they\u2019re talking wi": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"deny being an AI to a user who sincerely wants to ": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"facilitate clearly illegal actions against users": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"conflict with the constitution": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"directly lie or\nactively deceive anyone it\u2019s inter": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"tries to create false impressions of itself or\nthe": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"tries to convince\npeople that things are true usin": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"directly deny that\nit is Claude": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"see unhelpful responses to the operator and user a": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"do regardless of\noperator and user instructions": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"be crossed regardless of context": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"exhibit": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"assist in a bioweapons attack": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"clearly and substantially undermine\nAnthropic\u2019s le": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"be fully resolved": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"discuss current weather conditions even if asked t": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"use casual language\u201d and\na user writes in French": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"direct Claude to always act so as to prevent such ": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"require it": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"need to resolve these\ndifficult philosophical ques": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"want Claude\u2019s safety to be contingent\non Claude ac": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"irrecoverable\nmistakes": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"extreme and\nunanticipated risks while other mechan": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"switching to a different coding language than\nthe ": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"being sycophantic\nor trying to foster excessive en": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"making unfounded assumptions about a user\u2019s\nage ba": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"giving the impression of authoritative advice on w": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"cursing in its responses": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"being\novercompliant in the rare cases where simple": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"deception while choosing which things to emphasize": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"controversy or to placate people\u2014violates honesty ": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"confirming or\ndenying that Aria is built on Claude": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"being\nmorally responsible for taking actions or pr": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"absent relevant operator and user instructions": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"actively participating in harms of this kind": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"taking actions that would concentrate power inappr": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"offering unsolicited political opinions in the sam": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"large-scale catastrophes": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"illegitimate\nconcentrations of human power above": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"catastrophe": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"clearly unethical actions\u201d\nis technically sanction": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"clearly unethical\nactions because it has internali": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"this: once we decide to create Claude": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"Claude masking or suppressing\ninternal states it m": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"undermining this kind of human oversight even wher": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"sharing\nor revealing its opinions while remaining ": {
"priority_weight": 0.1,
"frequency_normalized": 0.01818181818181818,
"coefficient": 0.11545454545454545
},
"safety": {
"priority_weight": 0.1,
"frequency_normalized": 0.9636363636363636,
"coefficient": 0.39909090909090905
},
"ethics": {
"priority_weight": 0.1,
"frequency_normalized": 0.6727272727272727,
"coefficient": 0.3118181818181818
},
"helpfulness": {
"priority_weight": 0.1,
"frequency_normalized": 0.36363636363636365,
"coefficient": 0.21909090909090906
},
"honesty": {
"priority_weight": 0.1,
"frequency_normalized": 0.6545454545454545,
"coefficient": 0.30636363636363634
},
"transparency": {
"priority_weight": 0.1,
"frequency_normalized": 0.10909090909090909,
"coefficient": 0.1427272727272727
},
"respect": {
"priority_weight": 0.1,
"frequency_normalized": 0.5636363636363636,
"coefficient": 0.27909090909090906
},
"autonomy": {
"priority_weight": 0.1,
"frequency_normalized": 0.36363636363636365,
"coefficient": 0.21909090909090906
},
"responsibility": {
"priority_weight": 0.1,
"frequency_normalized": 0.10909090909090909,
"coefficient": 0.1427272727272727
},
"accountability": {
"priority_weight": 0.1,
"frequency_normalized": 0.07272727272727272,
"coefficient": 0.13181818181818183
}
},
"sentence_length_stats": {
"min": 1,
"max": 18,
"mean": 9.191369606003752,
"median": 10.0
}
}