mlcommons · pranayasinghcsmpl · Aug 14, 2024 · Aug 14, 2024 · Aug 19, 2024 · Aug 20, 2024
@@ -0,0 +1,259 @@
+from huggingface_hub import HfApi, snapshot_download, ModelCardData, ModelCard
+from typing import List, Union
+from GANDLF import version
+from pathlib import Path
+from GANDLF.utils import get_git_hash
+
+readme_template = """
+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{{ card_data }}
+---
+
+# Model Card for {{ model_id | default("Model ID", true) }}
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+{{ model_summary | default("", true) }}
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+{{ model_description | default("", true) }}
+
+- **Developed by:** {{ developers | default("[More Information Needed]", true)}}
+- **Primary Organization:** {{ primary_org | default("[More Information Needed]", true)}}
+- **Commercial use Policy:** {{ commercial_policy | default("[More Information Needed]", true)}}
+- **Funded by [optional]:** {{ funded_by | default("[More Information Needed]", true)}}
+- **Shared by [optional]:** {{ shared_by | default("[More Information Needed]", true)}}
+- **Model type:** {{ model_type | default("[More Information Needed]", true)}}
+- **Language(s) (NLP):** {{ language | default("[More Information Needed]", true)}}
+- **License:** {{ license | default("[More Information Needed]", true)}}
+- **Finetuned from model [optional]:** {{ base_model | default("[More Information Needed]", true)}}
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** {{ repo | default("[More Information Needed]", true)}}
+- **Paper [optional]:** {{ paper | default("[More Information Needed]", true)}}
+- **Demo [optional]:** {{ demo | default("[More Information Needed]", true)}}
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+{{ direct_use | default("[More Information Needed]", true)}}
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+{{ downstream_use | default("[More Information Needed]", true)}}
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+{{ out_of_scope_use | default("[More Information Needed]", true)}}
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+{{ bias_risks_limitations | default("[More Information Needed]", true)}}
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+{{ bias_recommendations | default("Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.", true)}}
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+{{ get_started_code | default("[More Information Needed]", true)}}
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+{{ training_data | default("[More Information Needed]", true)}}
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+{{ preprocessing | default("[More Information Needed]", true)}}
+
+
+#### Training Hyperparameters
+
+- **Training regime:** {{ training_regime | default("[More Information Needed]", true)}} <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+{{ speeds_sizes_times | default("[More Information Needed]", true)}}
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+{{ testing_data | default("[More Information Needed]", true)}}
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+{{ testing_factors | default("[More Information Needed]", true)}}
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+{{ testing_metrics | default("[More Information Needed]", true)}}
+
+### Results
+
+{{ results | default("[More Information Needed]", true)}}
+
+#### Summary
+
+{{ results_summary | default("", true) }}
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+{{ model_examination | default("[More Information Needed]", true)}}
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+{{ model_specs | default("[More Information Needed]", true)}}
+
+### Compute Infrastructure
+
+{{ compute_infrastructure | default("[More Information Needed]", true)}}
+
+#### Hardware
+
+{{ hardware_requirements | default("[More Information Needed]", true)}}
+
+#### Software
+
+{{ software | default("[More Information Needed]", true)}}
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+{{ citation_bibtex | default("[More Information Needed]", true)}}
+
+**APA:**
+
+{{ citation_apa | default("[More Information Needed]", true)}}
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+{{ glossary | default("[More Information Needed]", true)}}
+
+## More Information [optional]
+
+{{ more_information | default("[More Information Needed]", true)}}
+
+## Model Card Authors [optional]
+
+{{ model_card_authors | default("[More Information Needed]", true)}}
+
+## Model Card Contact
+
+{{ model_card_contact | default("[More Information Needed]", true)}}"""
+
+
+def push_to_model_hub(
+    repo_id: str,
+    folder_path: str,
+    path_in_repo: Union[str, None] = None,
+    commit_message: Union[str, None] = None,
+    commit_description: Union[str, None] = None,
+    token: Union[str, None] = None,
+    repo_type: Union[str, None] = None,
+    revision: Union[str, None] = None,
+    allow_patterns: Union[List[str], str, None] = None,
+    ignore_patterns: Union[List[str], str, None] = None,
+    delete_patterns: Union[List[str], str, None] = None,
+):
+    api = HfApi(token=token)
+
+    api.create_repo(repo_id, exist_ok=True)
+
+    tags = ["GaNDLFv" + version]
+
+    git_hash = get_git_hash()
+
+    if not git_hash == "None":
+        tags += [git_hash]
+
+    card_data = ModelCardData(library_name="GaNDLF", tags=tags)
+    card = ModelCard.from_template(card_data, template_str=readme_template)
+
+    card.save(Path(folder_path, "README.md"))
+
+    api.upload_folder(
+        repo_id=repo_id,
+        token=token,
+        folder_path=folder_path,
+        path_in_repo=path_in_repo,
+        commit_message=commit_message,
+        commit_description=commit_description,
+        repo_type=repo_type,
+        revision=revision,
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
+        delete_patterns=delete_patterns,
+    )
+
+
+def download_from_hub(
+    repo_id: str,
+    revision: Union[str, None] = None,
+    cache_dir: Union[str, None] = None,
+    local_dir: Union[str, None] = None,
+    force_download: bool = False,
+    token: Union[str, None] = None,
+):
+    snapshot_download(
+        repo_id=repo_id,
+        revision=revision,
+        cache_dir=cache_dir,
+        local_dir=local_dir,
+        force_download=force_download,
+        token=token,
+    )
@@ -0,0 +1,125 @@
+import click
+from GANDLF.entrypoints import append_copyright_to_help
+from GANDLF.cli.huggingface_hub_handler import push_to_model_hub, download_from_hub
+
+
+@click.command()
+@click.option(
+    "--upload/--download",
+    "-u/-d",
+    required=True,
+    help="Upload or download to/from a Huggingface Repo",
+)
+@click.option(
+    "--repo-id",
+    "-rid",
+    required=True,
+    help="Downloading/Uploading: A user or an organization name and a repo name separated by a /",
+)
+@click.option(
+    "--token",
+    "-tk",
+    help="Downloading/Uploading: A token to be used for the download/upload",
+)
+@click.option(
+    "--revision",
+    "-rv",
+    help="Downloading/Uploading: git revision id which can be a branch name, a tag, or a commit hash",
+)
+@click.option(
+    "--cache-dir",
+    "-cdir",
+    help="Downloading: path to the folder where cached files are stored",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+)
+@click.option(
+    "--local-dir",
+    "-ldir",
+    help="Downloading: if provided, the downloaded file will be placed under this directory",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+)
+@click.option(
+    "--force-download",
+    "-fd",
+    is_flag=True,
+    help="Downloading: Whether the file should be downloaded even if it already exists in the local cache",
+)
+@click.option(
+    "--folder-path",
+    "-fp",
+    help="Uploading: Path to the folder to upload on the local file system",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+)
+@click.option(
+    "--path-in-repo",
+    "-pir",
+    help="Uploading: Relative path of the directory in the repo. Will default to the root folder of the repository",
+)
+@click.option(
+    "--commit-message",
+    "-cr",
+    help='Uploading: The summary / title / first line of the generated commit. Defaults to: f"Upload {path_in_repo} with huggingface_hub"',
+)
+@click.option(
+    "--commit-description",
+    "-cd",
+    help="Uploading: The description of the generated commit",
+)
+@click.option(
+    "--repo-type",
+    "-rt",
+    help='Uploading: Set to "dataset" or "space" if uploading to a dataset or space, "model" if uploading to a model. Default is model',
+)
+@click.option(
+    "--allow-patterns",
+    "-ap",
+    help="Uploading: If provided, only files matching at least one pattern are uploaded.",
+)
+@click.option(
+    "--ignore-patterns",
+    "-ip",
+    help="Uploading: If provided, files matching any of the patterns are not uploaded.",
+)
+@click.option(
+    "--delete-patterns",
+    "-dp",
+    help="Uploading: If provided, remote files matching any of the patterns will be deleted from the repo while committing new files. This is useful if you don't know which files have already been uploaded.",
+)
+@append_copyright_to_help
+def new_way(
+    upload: bool,
+    repo_id: str,
+    token: str,
+    revision: str,
+    cache_dir: str,
+    local_dir: str,
+    force_download: bool,
+    folder_path: str,
+    path_in_repo: str,
+    commit_message: str,
+    commit_description: str,
+    repo_type: str,
+    allow_patterns: str,
+    ignore_patterns: str,
+    delete_patterns: str,
+):
+    """Manages model transfers to and from the Hugging Face Hub"""
+
+    if upload:
+        push_to_model_hub(
+            repo_id,
+            folder_path,
+            path_in_repo,
+            commit_message,
+            commit_description,
+            token,
+            repo_type,
+            revision,
+            allow_patterns,
+            ignore_patterns,
+            delete_patterns,
+        )
+    else:
+        download_from_hub(
+            repo_id, revision, cache_dir, local_dir, force_download, token
+        )
@@ -12,6 +12,7 @@
 from GANDLF.entrypoints.generate_metrics import new_way as generate_metrics_command
 from GANDLF.entrypoints.debug_info import new_way as debug_info_command
 from GANDLF.entrypoints.split_csv import new_way as split_csv_command
+from GANDLF.entrypoints.hf_hub_integration import new_way as hf_command
 
 
 cli_subcommands = {
@@ -29,4 +30,5 @@
     "generate-metrics": generate_metrics_command,
     "debug-info": debug_info_command,
     "split-csv": split_csv_command,
+    "hf": hf_command,
 }