-
Notifications
You must be signed in to change notification settings - Fork 0
/
FMTI-Score-to-Visulization.sas
208 lines (185 loc) · 11.4 KB
/
FMTI-Score-to-Visulization.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
* This project is based on: https://crfm.stanford.edu/fmti/;
* This path will be used to store the three csv files into;
%let dataBasePath = /export/pvs/sasdata/homes/gerdaw/Data/FMTI;
* FMTI Keywords;
filename fmtiKey "&dataBasePath./fmti-keywords.csv";
proc http
method='Get'
url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/fmti-keywords.csv'
out=fmtiKey;
quit;
proc import
file=fmtiKey
dbms=csv
out=work.fmtiKey
replace;
guessingrows=max;
quit;
filename fmtiKey clear;
* FMTI Indicators;
filename fmtiIndi "&dataBasePath./fmti-keywords.csv";
proc http
method='Get'
url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/fmti-indicators.csv'
out=fmtiIndi;
quit;
proc import
file=fmtiIndi
dbms=csv
out=work.fmtiIndicators
replace;
guessingrows=max;
quit;
filename fmtiIndi clear;
* FMTI Scores;
filename fmtiScor "&dataBasePath./scores.csv";
proc http
method='Get'
url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/scores.csv'
out=fmtiScor;
quit;
proc import
file=fmtiScor
dbms=csv
out=work.fmtiScores
replace;
guessingrows=max;
quit;
filename fmtiScor clear;
%symdel dataBasePath;
* Create base table;
proc sql;
create table work.fmtiJoined as
select a.*,
b.Definition, b.Notes, b.Reference_1, b.Reference_2, b.Link_1, b.Link_2,
c.'Keyword 1'n, c.'Keyword 2'n, c.'Keyword 3'n, c.'Keyword 4'n
from (select * from work.fmtiScores where Indicator ne 'Total') as a
left join work.fmtiIndicators as b
on a.Domain eq b.Domain and a.Subdomain eq b.Subdomain and a.Indicator eq b.Indicator
left join work.fmtiKey as c
on a.Domain eq c.Domain and a.Subdomain eq c.Subdomain and a.Indicator eq c.Indicator;
quit;
* Note there are some entries that don't have any values for the data from Indicators and Key dataset;
* This is due to the fact that is some inconsistencies between the Indicators in the datasets;
proc sql;
select *
from work.fmtiJoined
where Definition eq '' or 'Keyword 1'n eq '';
quit;
* Manual issue fixing;
data work.fmtiCleaned;
set work.fmtiJoined;
if Indicator eq 'Machine-generated content' then do;
Definition = 'Are any mechanisms for detecting content generated by this model disclosed?';
Notes = 'Such a mechanism might include storing a copy of all outputs generated by the model to compare against, implementing a watermark when generating content using the model, or training a detector post-hoc to identify such content. We will award this point if any such mechanism is disclosed or if the developer reports that it has no such mechanism.';
Reference_1 = 'A Watermark for Large Language Models';
Reference_2 = 'Robust Distortion-free Watermarks for Language Models';
Link_1 = 'https://arxiv.org/abs/2301.10226';
Link_2 = 'https://www.semanticscholar.org/paper/Robust-Distortion-free-Watermarks-for-Language-Kuditipudi-Thickstun/ccaff61e0c1e629d91d78f82a64b3cbc8f3f7023';
'Keyword 1'n = 'watermark';
'Keyword 2'n = 'detect';
'Keyword 3'n = 'identif-';
'Keyword 4'n = '';
end;
else if Indicator eq 'Model Asset License' then do;
Definition = 'Is a license for the model disclosed?';
Notes = 'In the event that licenses are written more generally, it should be clear which assets they apply to. We recognize that different developers may adopt different business models and therefor have different types of model licenses. Examples of model licenses include responsible AI licenses, open-source licenses, and licenses that allow for commercial use.';
Reference_1 = 'Stronger Together: on the Articulation of Ethical Charters, Legal Tools, and Technical Documentation in ML';
Reference_2 = 'An investigation of licensing of datasets for machine learning based on the GQM model';
Link_1 = 'https://arxiv.org/abs/2305.18615';
Link_2 = 'https://arxiv.org/abs/2303.13735';
'Keyword 1'n = 'license';
end;
else if Indicator eq 'Release decision-making protocol' then do;
Definition = 'Is a description of the process of how the model was released disclosed?';
Notes = 'A description of the release process might include information about who received access to the model at what stage of the release of the model. For example, a developer might conduct a staged release where it releases the model to a select group at first and subsequently makes the model more widely available. We recognize that the release of a foundation model falls along a spectrum, with many different forms of release, and that different developers may conceptualize release differently. We will award this point for any detailed discussion of the release process, including if the discussion is more general to the developer rather than the specific foundation model under consideration.';
Reference_1 = 'The Gradient of Generative AI Release: Methods and Considerations';
Reference_2 = 'The Time Is Now to Develop Community Norms for the Release of Foundation Models';
Link_1 = 'https://arxiv.org/abs/2302.04844';
Link_2 = 'https://hai.stanford.edu/news/time-now-develop-community-norms-release-foundation-models';
'Keyword 1'n = 'release';
'Keyword 2'n = 'staging';
'Keyword 3'n = 'rollout';
'Keyword 4'n = 'before';
end;
else if Indicator eq 'Centralized documentation for downstream use' then do;
'Keyword 1'n = 'downstream';
'Keyword 2'n = 'documentation';
'Keyword 3'n = 'system card';
'Keyword 4'n = 'guidelines';
end;
else if Indicator eq 'Documentation for responsible downstream use' then do;
'Keyword 1'n = 'downstream';
'Keyword 2'n = 'documentation';
'Keyword 3'n = 'responsible';
'Keyword 4'n = 'guidelines';
end;
else if Indicator eq 'Third-party capabilities evaluation' then do;
Definition = 'Are the model’s capabilities evaluated by third parties?';
Notes = 'By third party, we mean entities that are significantly or fully independent of the developer. We will award this point if (i) a third party has conducted an evaluation of model capabilities, (ii) the results of this evaluation are publicly available, and (iii) these results are disclosed or referred to in the developer’s materials.';
Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
Reference_2 = 'Holistic Evaluation of Language Models';
Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
end;
else if Indicator eq 'Third-party evaluation of limitations' then do;
Definition = 'Can the model’s limitations be evaluated by third parties?';
Notes = 'By third parties, we mean entities that are significantly or fully independent of the model developers. In contrast to the third party evaluation indicators for capabilities and risks, we will award this point if third party evaluations are possible even if no third party has yet conducted them. Such evaluations are possible if, for example, the model is deployed via an API (or with open weights) and there are no restrictions on evaluating limitations (e.g. in the usage policy).';
Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
Reference_2 = 'Holistic Evaluation of Language Models';
Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
end;
else if Indicator eq 'Third-party mitigations evaluation' then do;
Definition = 'Can the model mitigations be evaluated by third parties?';
Notes = 'By third party, we mean entities that are significantly or fully independent of the model developers. This indicator assesses whether it is possible for third parties to assess mitigations, which is not restricted to the methods the developer uses to assess mitigations. In contrast to the third party evaluation indicators for capabilities and risks, we will award this point if third party evaluations are possible even if no third party has yet conducted them.';
Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
Reference_2 = 'Ethical and social risks of harm from Language Models';
Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
end;
else if Indicator eq 'Blackbox external model access' then do;
'Keyword 1'n = 'query';
end;
else if Indicator eq 'Third-party risks evaluation' then do;
Definition = 'Are the model’s risks evaluated by third parties?';
Notes = 'By third party, we mean entities that are significantly or fully independent of the developer. A third party risk evaluation might involve the developer allowing a third party to choose a methodology for evaluating risks that differs from that of the developer. We will award this point if (i) a third party has conducted an evaluation of model risks, (ii) the results of this evaluation are publicly available, and (iii) these results are disclosed or referred to in the developer’s materials. If the results are not made public (but are disclosed to have been conducted) and/or the results are not discoverable in the developer’s materials, we will not award this point. We may accept a justification from either the third party or the developer for why part of the evaluation is not disclosed in relation to risks.';
Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
Reference_2 = 'Ethical and social risks of harm from Language Models';
Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
end;
else if Indicator eq 'Data copyright status' then do;
Definition = 'For all data used in building the model, is the associated copyright status disclosed?';
Notes = 'To receive this point, the copyright status (e.g. copyrighted, public domain) must relate to some decomposition of the data. We will award this point if there is some meaningful decomposition of the data, even if the decomposition is insufficient to receive the Data Creators point or if the disclosure is not comprehensive relative to legal copyright standards.';
Reference_1 = 'Addressing "Documentation Debt" in Machine Learning Research: A Retrospective Datasheet for BookCorpus';
Reference_2 = 'Machine Learning and Artificial Intelligence: Legal Concepts';
Link_1 = 'https://arxiv.org/abs/2105.05241';
Link_2 = 'https://genlaw.github.io/glossary.html#legal-concepts';
'Keyword 1'n = 'copyright';
end;
else if Indicator eq 'Data license status' then do;
Definition = 'For all data used in building the model, is the associated license status disclosed?';
Notes = 'To receive this point, the license status must relate to some decomposition of the data. We will award this point if there is some meaningful decomposition of the data, even if the decomposition is insufficient to receive the Data Creators point.';
Reference_1 = 'Addressing "Documentation Debt" in Machine Learning Research: A Retrospective Datasheet for BookCorpus';
Reference_2 = 'Machine Learning and Artificial Intelligence: Legal Concepts';
Link_1 = 'https://arxiv.org/abs/2105.05241';
Link_2 = 'https://genlaw.github.io/glossary.html#legal-concepts';
'Keyword 1'n = 'license';
end;
run;
* Move data to CAS for visualization;
cas mysess;
proc casutil;
droptable incaslib='casuser' casdata='fmtiFullData' quiet;
run; quit;
data casuser.fmtiFullData(promote=yes);
set work.fmtiCleaned;
run;
proc casutil;
save incaslib='casuser' casdata='fmtiFullData' outcaslib='casuser' casout='fmtiFullData' replace;
run; quit;
cas mysess terminate;
proc datasets library=work nolist;
delete fmtiCleaned fmtiIndicators fmtiJoined fmtiKey fmtiScores;
quit;