diff --git a/hf-model-import-job/README.md b/hf-model-import-job/README.md new file mode 100644 index 0000000..96199fb --- /dev/null +++ b/hf-model-import-job/README.md @@ -0,0 +1,53 @@ +# Hugging Face Model Import Job + +This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry. + +## Installation + +1. Create a virtual environment (recommended): + +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +3. Make sure you have TrueFoundry credentials configured (via `tfy login` or environment variables). + +## Usage + +### Basic Usage + +```bash +# Make sure to activate your virtual environment first +source venv/bin/activate # On Windows: venv\Scripts\activate + +python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation" +``` + +## Arguments + +- `--model-id` (required): Hugging Face model ID or repository ID +- `--ml-repo` (required): TrueFoundry ML repository name +- `--model-name` (required): Name for the model in TrueFoundry +- `--model-type` (required): Type of the model (e.g., 'text-generation', 'fill-mask') +- `--hf-token` (optional): Hugging Face token for private models + +## Examples + +### Import a popular language model: + +```bash +python main.py --model-id "gpt2" --ml-repo "language-models" --model-name "gpt2-small" --model-type "text-generation" +``` + +### Import a BERT model: + +```bash +python main.py --model-id "bert-base-uncased" --ml-repo "nlp-models" --model-name "bert-base" --model-type "fill-mask" +``` diff --git a/hf-model-import-job/deploy.py b/hf-model-import-job/deploy.py new file mode 100644 index 0000000..2c94689 --- /dev/null +++ b/hf-model-import-job/deploy.py @@ -0,0 +1,69 @@ +import logging +from truefoundry.deploy import ( + Param, + Manual, + Build, + Resources, + Job, + PythonBuild, + NodeSelector, + LocalSource, +) +import argparse + +logging.basicConfig(level=logging.INFO) + +parser = argparse.ArgumentParser() +parser.add_argument("--workspace_fqn", required=True, type=str) +args = parser.parse_args() + +job = Job( + name="hf-model-importer", + image=Build( + # Set build_source=LocalSource(local_build=False), in order to deploy code from your local. + # With local_build=False flag, docker image will be built on cloud instead of local + # Else it will try to use docker installed on your local machine to build the image + build_source=LocalSource(local_build=False), + build_spec=PythonBuild( + python_version="3.11", + build_context_path="./hf-model-import-job", + requirements_path="requirements.txt", + command="python main.py --model-id {{model_id}} --model-type {{model_type}} --ml-repo {{ml_repo}} --model-name {{model_name}}", + ), + ), + trigger=Manual(), + params=[ + Param( + name="model_id", description="Hugging face model ID", param_type="string" + ), + Param( + name="model_type", + description="model type from hugging face", + default="text-generation", + param_type="string", + ), + Param( + name="ml_repo", + description="ML repo name to import model to", + param_type="ml_repo", + ), + Param( + name="model_name", + description="Model name in truefoundry model registry", + param_type="string", + ), + ], + resources=Resources( + cpu_request=1.0, + cpu_limit=2.0, + memory_request=2000, + memory_limit=4000, + ephemeral_storage_request=10000, + ephemeral_storage_limit=20000, + ), + retries=0, + workspace_fqn=args.workspace_fqn, +) + + +job.deploy(workspace_fqn=args.workspace_fqn, wait=False) diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py new file mode 100644 index 0000000..93acfad --- /dev/null +++ b/hf-model-import-job/main.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +""" +Hugging Face Model Import Script for TrueFoundry + +This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry. +""" + +import argparse +import os +import tempfile +import shutil + +from huggingface_hub import snapshot_download +from truefoundry.ml import get_client, TransformersFramework + +def main(): + """Main function to handle command line arguments and orchestrate the process.""" + parser = argparse.ArgumentParser( + description="Download a Hugging Face model and log it to TrueFoundry", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation" + python main.py --model-id "gpt2" --ml-repo "my-repo" --model-name "gpt2-model" --model-type "text-generation" + """ + ) + + parser.add_argument( + "--model-id", + required=True, + help="Hugging Face model ID (e.g., 'microsoft/DialoGPT-medium')" + ) + + parser.add_argument( + "--ml-repo", + required=True, + help="TrueFoundry ML repository name" + ) + + parser.add_argument( + "--model-name", + required=True, + help="Name for the model in TrueFoundry" + ) + + parser.add_argument( + "--model-type", + required=True, + help="Type of the model (e.g., 'text-generation')" + ) + + parser.add_argument( + "--hf-token", + required=False, + help="Hugging Face token for private models" + ) + + args = parser.parse_args() + + base_dir = os.getenv("BASE_DIR", ".") + + with tempfile.TemporaryDirectory(dir=base_dir) as temp_dir: + model_download_path = temp_dir + + print(f"Downloading model to {model_download_path}") + + snapshot_download( + args.model_id, + revision=None, + cache_dir=None, + local_dir=model_download_path, + ignore_patterns=["*.h5", "*.ot"], + token=args.hf_token, + ) + + if os.path.exists(os.path.join(model_download_path, '.cache')): + shutil.rmtree(os.path.join(model_download_path, '.cache')) + + + ML_REPO = args.ml_repo # ML Repo to upload to + MODEL_NAME = args.model_name # Model Name to upload as + + client = get_client() + model_version = client.log_model( + ml_repo=ML_REPO, + name=MODEL_NAME, + model_file_or_folder=model_download_path, + framework=TransformersFramework( + model_id=args.model_id, + pipeline_tag=args.model_type + ), + ) + + print(f"\n✅ Success! Model logged to TrueFoundry with FQN: {model_version.fqn}") + +if __name__ == "__main__": + main() diff --git a/hf-model-import-job/requirements.txt b/hf-model-import-job/requirements.txt new file mode 100644 index 0000000..118e1a3 --- /dev/null +++ b/hf-model-import-job/requirements.txt @@ -0,0 +1,4 @@ +truefoundry==0.11.12 +huggingface_hub>=0.19.0 +transformers>=4.30.0 +torch>=2.0.0 \ No newline at end of file