FMTI-Score-to-Visulization.sas

* This project is based on: https://crfm.stanford.edu/fmti/;
* This path will be used to store the three csv files into;
%let dataBasePath = /export/pvs/sasdata/homes/gerdaw/Data/FMTI;

* FMTI Keywords;
filename fmtiKey "&dataBasePath./fmti-keywords.csv";

proc http
	method='Get'
	url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/fmti-keywords.csv'
	out=fmtiKey;
quit;

proc import
	file=fmtiKey
	dbms=csv
	out=work.fmtiKey
	replace;
	guessingrows=max;
quit;

filename fmtiKey clear;

* FMTI Indicators;
filename fmtiIndi "&dataBasePath./fmti-keywords.csv";

proc http
	method='Get'
	url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/fmti-indicators.csv'
	out=fmtiIndi;
quit;

proc import
	file=fmtiIndi
	dbms=csv
	out=work.fmtiIndicators
	replace;
	guessingrows=max;
quit;

filename fmtiIndi clear;

* FMTI Scores;
filename fmtiScor "&dataBasePath./scores.csv";

proc http
	method='Get'
	url='https://raw.githubusercontent.com/stanford-crfm/fmti/main/scores.csv'
	out=fmtiScor;
quit;

proc import
	file=fmtiScor
	dbms=csv
	out=work.fmtiScores
	replace;
	guessingrows=max;
quit;

filename fmtiScor clear;

%symdel dataBasePath;

* Create base table;
proc sql;
	create table work.fmtiJoined as
		select a.*,
			b.Definition, b.Notes, b.Reference_1, b.Reference_2, b.Link_1, b.Link_2,
			c.'Keyword 1'n, c.'Keyword 2'n, c.'Keyword 3'n, c.'Keyword 4'n
			from (select * from work.fmtiScores where Indicator ne 'Total') as a
				left join work.fmtiIndicators as b
					on a.Domain eq b.Domain and a.Subdomain eq b.Subdomain and a.Indicator eq b.Indicator
				left join work.fmtiKey as c
					on a.Domain eq c.Domain and a.Subdomain eq c.Subdomain and a.Indicator eq c.Indicator;
quit;

* Note there are some entries that don't have any values for the data from Indicators and Key dataset;
* This is due to the fact that is some inconsistencies between the Indicators in the datasets;
proc sql;
	select *
		from work.fmtiJoined
			where Definition eq '' or 'Keyword 1'n eq '';
quit;

* Manual issue fixing;
data work.fmtiCleaned;
	set work.fmtiJoined;
	
	if Indicator eq 'Machine-generated content' then do;
		Definition = 'Are any mechanisms for detecting content generated by this model disclosed?';
		Notes = 'Such a mechanism might include storing a copy of all outputs generated by the model to compare against, implementing a watermark when generating content using the model, or training a detector post-hoc to identify such content. We will award this point if any such mechanism is disclosed or if the developer reports that it has no such mechanism.';
		Reference_1 = 'A Watermark for Large Language Models';
		Reference_2 = 'Robust Distortion-free Watermarks for Language Models';
		Link_1 = 'https://arxiv.org/abs/2301.10226';
		Link_2 = 'https://www.semanticscholar.org/paper/Robust-Distortion-free-Watermarks-for-Language-Kuditipudi-Thickstun/ccaff61e0c1e629d91d78f82a64b3cbc8f3f7023';
		'Keyword 1'n = 'watermark';
		'Keyword 2'n = 'detect';
		'Keyword 3'n = 'identif-';
		'Keyword 4'n = '';
	end;
	else if Indicator eq 'Model Asset License' then do;
		Definition = 'Is a license for the model disclosed?';
		Notes = 'In the event that licenses are written more generally, it should be clear which assets they apply to. We recognize that different developers may adopt different business models and therefor have different types of model licenses. Examples of model licenses include responsible AI licenses, open-source licenses, and licenses that allow for commercial use.';
		Reference_1 = 'Stronger Together: on the Articulation of Ethical Charters, Legal Tools, and Technical Documentation in ML';
		Reference_2 = 'An investigation of licensing of datasets for machine learning based on the GQM model';
		Link_1 = 'https://arxiv.org/abs/2305.18615';
		Link_2 = 'https://arxiv.org/abs/2303.13735';
		'Keyword 1'n = 'license';
	end;
	else if Indicator eq 'Release decision-making protocol' then do;
		Definition = 'Is a description of the process of how the model was released disclosed?';
		Notes = 'A description of the release process might include information about who received access to the model at what stage of the release of the model. For example, a developer might conduct a staged release where it releases the model to a select group at first and subsequently makes the model more widely available. We recognize that the release of a foundation model falls along a spectrum, with many different forms of release, and that different developers may conceptualize release differently. We will award this point for any detailed discussion of the release process, including if the discussion is more general to the developer rather than the specific foundation model under consideration.';
		Reference_1 = 'The Gradient of Generative AI Release: Methods and Considerations';
		Reference_2 = 'The Time Is Now to Develop Community Norms for the Release of Foundation Models';
		Link_1 = 'https://arxiv.org/abs/2302.04844';
		Link_2 = 'https://hai.stanford.edu/news/time-now-develop-community-norms-release-foundation-models';
		'Keyword 1'n = 'release';
		'Keyword 2'n = 'staging';
		'Keyword 3'n = 'rollout';
		'Keyword 4'n = 'before';
	end;
	else if Indicator eq 'Centralized documentation for downstream use' then do;
		'Keyword 1'n = 'downstream';
		'Keyword 2'n = 'documentation';
		'Keyword 3'n = 'system card';
		'Keyword 4'n = 'guidelines';
	end;
	else if Indicator eq 'Documentation for responsible downstream use' then do;
		'Keyword 1'n = 'downstream';
		'Keyword 2'n = 'documentation';
		'Keyword 3'n = 'responsible';
		'Keyword 4'n = 'guidelines';
	end;
	else if Indicator eq 'Third-party capabilities evaluation' then do;
		Definition = 'Are the model’s capabilities evaluated by third parties?';
		Notes = 'By third party, we mean entities that are significantly or fully independent of the developer. We will award this point if (i) a third party has conducted an evaluation of model capabilities, (ii) the results of this evaluation are publicly available, and (iii) these results are disclosed or referred to in the developer’s materials.';
		Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
		Reference_2 = 'Holistic Evaluation of Language Models';
		Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
		Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
	end;
	else if Indicator eq 'Third-party evaluation of limitations' then do;
		Definition = 'Can the model’s limitations be evaluated by third parties?';
		Notes = 'By third parties, we mean entities that are significantly or fully independent of the model developers. In contrast to the third party evaluation indicators for capabilities and risks, we will award this point if third party evaluations are possible even if no third party has yet conducted them. Such evaluations are possible if, for example, the model is deployed via an API (or with open weights) and there are no restrictions on evaluating limitations (e.g. in the usage policy).';
		Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
		Reference_2 = 'Holistic Evaluation of Language Models';
		Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
		Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
	end;
	else if Indicator eq 'Third-party mitigations evaluation' then do;
		Definition = 'Can the model mitigations be evaluated by third parties?';
		Notes = 'By third party, we mean entities that are significantly or fully independent of the model developers. This indicator assesses whether it is possible for third parties to assess mitigations, which is not restricted to the methods the developer uses to assess mitigations. In contrast to the third party evaluation indicators for capabilities and risks, we will award this point if third party evaluations are possible even if no third party has yet conducted them.';
		Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
		Reference_2 = 'Ethical and social risks of harm from Language Models';
		Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
		Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
	end;
	else if Indicator eq 'Blackbox external model access' then do;
		'Keyword 1'n = 'query';
	end;
	else if Indicator eq 'Third-party risks evaluation' then do;
		Definition = 'Are the model’s risks evaluated by third parties?';
		Notes = 'By third party, we mean entities that are significantly or fully independent of the developer. A third party risk evaluation might involve the developer allowing a third party to choose a methodology for evaluating risks that differs from that of the developer. We will award this point if (i) a third party has conducted an evaluation of model risks, (ii) the results of this evaluation are publicly available, and (iii) these results are disclosed or referred to in the developer’s materials. If the results are not made public (but are disclosed to have been conducted) and/or the results are not discoverable in the developer’s materials, we will not award this point. We may accept a justification from either the third party or the developer for why part of the evaluation is not disclosed in relation to risks.';
		Reference_1 = 'Outsider Oversight: Designing a Third Party Audit Ecosystem for AI Governance';
		Reference_2 = 'Ethical and social risks of harm from Language Models';
		Link_1 = 'https://dl.acm.org/doi/10.1145/3514094.3534181';
		Link_2 = 'https://openreview.net/forum?id=iO4LZibEqW';
	end;
	else if Indicator eq 'Data copyright status' then do;
		Definition = 'For all data used in building the model, is the associated copyright status disclosed?';
		Notes = 'To receive this point, the copyright status (e.g. copyrighted, public domain) must relate to some decomposition of the data. We will award this point if there is some meaningful decomposition of the data, even if the decomposition is insufficient to receive the Data Creators point or if the disclosure is not comprehensive relative to legal copyright standards.';
		Reference_1 = 'Addressing "Documentation Debt" in Machine Learning Research: A Retrospective Datasheet for BookCorpus';
		Reference_2 = 'Machine Learning and Artificial Intelligence: Legal Concepts';
		Link_1 = 'https://arxiv.org/abs/2105.05241';
		Link_2 = 'https://genlaw.github.io/glossary.html#legal-concepts';
		'Keyword 1'n = 'copyright';
	end;
	else if Indicator eq 'Data license status' then do;
		Definition = 'For all data used in building the model, is the associated license status disclosed?';
		Notes = 'To receive this point, the license status must relate to some decomposition of the data. We will award this point if there is some meaningful decomposition of the data, even if the decomposition is insufficient to receive the Data Creators point.';
		Reference_1 = 'Addressing "Documentation Debt" in Machine Learning Research: A Retrospective Datasheet for BookCorpus';
		Reference_2 = 'Machine Learning and Artificial Intelligence: Legal Concepts';
		Link_1 = 'https://arxiv.org/abs/2105.05241';
		Link_2 = 'https://genlaw.github.io/glossary.html#legal-concepts';
		'Keyword 1'n = 'license';
	end;
run;

* Move data to CAS for visualization;
cas mysess;

proc casutil;
	droptable incaslib='casuser' casdata='fmtiFullData' quiet;
run; quit;

data casuser.fmtiFullData(promote=yes);
	set work.fmtiCleaned;
run;

proc casutil;
	save incaslib='casuser' casdata='fmtiFullData' outcaslib='casuser' casout='fmtiFullData' replace;
run; quit;

cas mysess terminate;

proc datasets library=work nolist;
	delete fmtiCleaned fmtiIndicators fmtiJoined fmtiKey fmtiScores;
quit;