diff --git a/examples/model-conversion/Makefile b/examples/model-conversion/Makefile index ac7a4147297c5..f0867cfe46c3a 100644 --- a/examples/model-conversion/Makefile +++ b/examples/model-conversion/Makefile @@ -118,13 +118,17 @@ embedding-convert-model: embedding-run-original-model: $(call validate_embedding_model_path,embedding-run-original-model) - @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/embedding/run-original-model.py + @EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \ + ./scripts/embedding/run-original-model.py \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-run-converted-model: - @CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/embedding/run-converted-model.sh ${CONVERTED_EMBEDDING_MODEL} + @./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-verify-logits: embedding-run-original-model embedding-run-converted-model - @./scripts/embedding/compare-embeddings-logits.sh + @./scripts/embedding/compare-embeddings-logits.sh \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") embedding-inspect-original-model: $(call validate_embedding_model_path,embedding-inspect-original-model) @@ -156,7 +160,8 @@ embedding-quantize-model: $(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL) embedding-run-quantized-model: - @./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL} + @./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \ + $(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)") ### ### Perplexity targets/recipes diff --git a/examples/model-conversion/logits.cpp b/examples/model-conversion/logits.cpp index ddc5e9005f9e0..6dc334189f4be 100644 --- a/examples/model-conversion/logits.cpp +++ b/examples/model-conversion/logits.cpp @@ -151,6 +151,35 @@ int main(int argc, char ** argv) { logits = llama_get_embeddings(ctx); n_logits = llama_model_n_embd(model) * batch.n_tokens; type = "-embeddings"; + + const int n_embd = llama_model_n_embd(model); + const int n_embd_count = batch.n_tokens; + + printf("Embedding dimension: %d\n", n_embd); + printf("\n"); + + // Print embeddings in the specified format + for (int j = 0; j < n_embd_count; j++) { + printf("embedding %d: ", j); + + // Print first 3 values + for (int i = 0; i < 3 && i < n_embd; i++) { + printf("%9.6f ", logits[j * n_embd + i]); + } + + printf(" ... "); + + // Print last 3 values + for (int i = n_embd - 3; i < n_embd; i++) { + if (i >= 0) { + printf("%9.6f ", logits[j * n_embd + i]); + } + } + + printf("\n"); + } + printf("\n"); + printf("Embeddings size: %d\n", n_logits); } else { logits = llama_get_logits_ith(ctx, batch.n_tokens - 1); @@ -183,22 +212,23 @@ int main(int argc, char ** argv) { return 1; } for (int i = 0; i < n_logits; i++) { - fprintf(f, "%d: %.6f\n", i, logits[i]); // Added index and changed format + fprintf(f, "%d: %.6f\n", i, logits[i]); } fclose(f); - // Print first and last 10 logits for quick verification - printf("First 10 logits: "); - for (int i = 0; i < 10 && i < n_logits; i++) { - printf("%.6f ", logits[i]); - } - printf("\n"); + if (!embedding_mode) { + printf("First 10 logits: "); + for (int i = 0; i < 10 && i < n_logits; i++) { + printf("%.6f ", logits[i]); + } + printf("\n"); - printf("Last 10 logits: "); - for (int i = n_logits - 10; i < n_logits; i++) { - if (i >= 0) printf("%.6f ", logits[i]); + printf("Last 10 logits: "); + for (int i = n_logits - 10; i < n_logits; i++) { + if (i >= 0) printf("%.6f ", logits[i]); + } + printf("\n\n"); } - printf("\n\n"); printf("Logits saved to %s\n", bin_filename); printf("Logits saved to %s\n", txt_filename); diff --git a/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh b/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh index 1401dcb43ee92..c48af3075c62f 100755 --- a/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh +++ b/examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh @@ -2,8 +2,37 @@ set -e -MODEL_PATH="${1:-"$EMBEDDING_MODEL_PATH"}" -MODEL_NAME="${2:-$(basename "$MODEL_PATH")}" +# Parse command line arguments +MODEL_PATH="" +MODEL_NAME="" +PROMPTS_FILE="" + +# First argument is always model path +if [ $# -gt 0 ] && [[ "$1" != --* ]]; then + MODEL_PATH="$1" + shift +fi + +# Parse remaining arguments +while [[ $# -gt 0 ]]; do + case $1 in + --prompts-file|-pf) + PROMPTS_FILE="$2" + shift 2 + ;; + *) + # If MODEL_NAME not set and this isn't a flag, use as model name + if [ -z "$MODEL_NAME" ] && [[ "$1" != --* ]]; then + MODEL_NAME="$1" + fi + shift + ;; + esac +done + +# Set defaults +MODEL_PATH="${MODEL_PATH:-"$EMBEDDING_MODEL_PATH"}" +MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}" if [ -t 0 ]; then CPP_EMBEDDINGS="data/llamacpp-${MODEL_NAME}-embeddings.bin" @@ -35,8 +64,18 @@ with open('$TEMP_FILE', 'wb') as f: trap "rm -f $TEMP_FILE" EXIT fi -python scripts/utils/semantic_check.py --model-path $MODEL_PATH \ +# Build the semantic_check.py command +SEMANTIC_CMD="python scripts/utils/semantic_check.py --model-path $MODEL_PATH \ --python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \ - --cpp-embeddings $CPP_EMBEDDINGS \ - --prompt "Hello world today" + --cpp-embeddings $CPP_EMBEDDINGS" + +# Add prompts file if specified, otherwise use default prompt +if [ -n "$PROMPTS_FILE" ]; then + SEMANTIC_CMD="$SEMANTIC_CMD --prompts-file \"$PROMPTS_FILE\"" +else + SEMANTIC_CMD="$SEMANTIC_CMD --prompt \"Hello world today\"" +fi + +# Execute the command +eval $SEMANTIC_CMD diff --git a/examples/model-conversion/scripts/embedding/run-converted-model.sh b/examples/model-conversion/scripts/embedding/run-converted-model.sh index 24b28106275df..f3e2676632070 100755 --- a/examples/model-conversion/scripts/embedding/run-converted-model.sh +++ b/examples/model-conversion/scripts/embedding/run-converted-model.sh @@ -2,8 +2,27 @@ set -e -# First try command line argument, then environment variable, then file -CONVERTED_MODEL="${1:-"$CONVERTED_EMBEDDING_MODEL"}" +# Parse command line arguments +CONVERTED_MODEL="" +PROMPTS_FILE="" + +while [[ $# -gt 0 ]]; do + case $1 in + -p|--prompts-file) + PROMPTS_FILE="$2" + shift 2 + ;; + *) + if [ -z "$CONVERTED_MODEL" ]; then + CONVERTED_MODEL="$1" + fi + shift + ;; + esac +done + +# First try command line argument, then environment variable +CONVERTED_MODEL="${CONVERTED_MODEL:-"$CONVERTED_EMBEDDING_MODEL"}" # Final check if we have a model path if [ -z "$CONVERTED_MODEL" ]; then @@ -13,8 +32,19 @@ if [ -z "$CONVERTED_MODEL" ]; then exit 1 fi +# Read prompt from file or use default +if [ -n "$PROMPTS_FILE" ]; then + if [ ! -f "$PROMPTS_FILE" ]; then + echo "Error: Prompts file '$PROMPTS_FILE' not found" >&2 + exit 1 + fi + PROMPT=$(cat "$PROMPTS_FILE") +else + PROMPT="Hello world today" +fi + echo $CONVERTED_MODEL cmake --build ../../build --target llama-logits -j8 - -../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "Hello world today" +# TODO: update logits.cpp to accept a --file/-f option for the prompt +../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "$PROMPT" diff --git a/examples/model-conversion/scripts/embedding/run-original-model.py b/examples/model-conversion/scripts/embedding/run-original-model.py index b9db0b893f13a..4a3e162413fa6 100755 --- a/examples/model-conversion/scripts/embedding/run-original-model.py +++ b/examples/model-conversion/scripts/embedding/run-original-model.py @@ -13,14 +13,37 @@ parser = argparse.ArgumentParser(description='Process model with specified path') parser.add_argument('--model-path', '-m', help='Path to the model') +parser.add_argument('--prompts-file', '-p', help='Path to file containing prompts (one per line)') args = parser.parse_args() +def read_prompt_from_file(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"Error: Prompts file '{file_path}' not found") + exit(1) + except Exception as e: + print(f"Error reading prompts file: {e}") + exit(1) + model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path) if model_path is None: parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable") tokenizer = AutoTokenizer.from_pretrained(model_path) +config = AutoConfig.from_pretrained(model_path) + +# This can be used to override the sliding window size for manual testing. This +# can be useful to verify the sliding window attention mask in the original model +# and compare it with the converted .gguf model. +if hasattr(config, 'sliding_window'): + original_sliding_window = config.sliding_window + #original_sliding_window = 6 + print(f"Modified sliding window: {original_sliding_window} -> {config.sliding_window}") + +print(f"Using unreleased model: {unreleased_model_name}") if unreleased_model_name: model_name_lower = unreleased_model_name.lower() unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}" @@ -29,19 +52,28 @@ try: model_class = getattr(importlib.import_module(unreleased_module_path), class_name) - model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained + model = model_class.from_pretrained(model_path, config=config) except (ImportError, AttributeError) as e: print(f"Failed to import or load model: {e}") exit(1) else: - model = AutoModel.from_pretrained(model_path) + model = AutoModel.from_pretrained(model_path, config=config) print(f"Model class: {type(model)}") -#print(f"Model file: {type(model).__module__}") -config = AutoConfig.from_pretrained(model_path) +print(f"Model file: {type(model).__module__}") + +# Verify the model is using the correct sliding window +if hasattr(model.config, 'sliding_window'): + print(f"Model's sliding_window: {model.config.sliding_window}") +else: + print("Model config does not have sliding_window attribute") model_name = os.path.basename(model_path) -texts = [ "Hello world today" ] +if args.prompts_file: + prompt_text = read_prompt_from_file(args.prompts_file) + texts = [prompt_text] +else: + texts = ["Hello world today"] encoded = tokenizer( texts, diff --git a/examples/model-conversion/scripts/utils/inspect-org-model.py b/examples/model-conversion/scripts/utils/inspect-org-model.py index ea14947fd2ef8..bc6f45a5fb7d0 100755 --- a/examples/model-conversion/scripts/utils/inspect-org-model.py +++ b/examples/model-conversion/scripts/utils/inspect-org-model.py @@ -40,7 +40,7 @@ file_path = os.path.join(model_path, file_name) print(f"\n--- From {file_name} ---") - with safe_open(file_path, framework="pt") as f: # type: ignore + with safe_open(file_path, framework="pt") as f: for tensor_name in sorted(tensor_names): tensor = f.get_tensor(tensor_name) print(f"- {tensor_name} : shape = {tensor.shape}, dtype = {tensor.dtype}") @@ -49,7 +49,7 @@ # Single file model (original behavior) print("Single-file model detected") - with safe_open(single_file_path, framework="pt") as f: # type: ignore + with safe_open(single_file_path, framework="pt") as f: keys = f.keys() print("Tensors in model:") for key in sorted(keys): diff --git a/examples/model-conversion/scripts/utils/semantic_check.py b/examples/model-conversion/scripts/utils/semantic_check.py index d2110480974e7..7fd417bceaa8b 100644 --- a/examples/model-conversion/scripts/utils/semantic_check.py +++ b/examples/model-conversion/scripts/utils/semantic_check.py @@ -101,6 +101,17 @@ def test_single_prompt_similarity(python_emb, cpp_emb, tokens, prompt): 'rms_diff': np.sqrt(np.mean(diff_matrix**2)) } +def read_prompt_from_file(file_path): + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read().strip() + except FileNotFoundError: + print(f"Error: Prompts file '{file_path}' not found") + exit(1) + except Exception as e: + print(f"Error reading prompts file: {e}") + exit(1) + def main(): parser = argparse.ArgumentParser(description='Test semantic similarity between Python and llama.cpp embeddings') parser.add_argument('--model-path', '-m', required=True, help='Path to the original Python model') @@ -108,14 +119,20 @@ def main(): parser.add_argument('--cpp-embeddings', '-ce', help='Path to llama.cpp embeddings "logits" binary file') parser.add_argument('--causal', '-c', default=False, help='if the model is causal (default: false)', action='store_true') parser.add_argument('--prompt', '-p', default='Hello world today', help='Test prompt') + parser.add_argument('--prompts-file', '-pf', help='Path to file containing prompts') args = parser.parse_args() + if args.prompts_file: + prompt = read_prompt_from_file(args.prompts_file) + else: + prompt = args.prompt + print("Semantic Similarity Test Between Python and llama.cpp Embedding Models") print("=" * 70) # Single prompt detailed comparison - print(f"\nTesting with prompt: '{args.prompt}'") + print(f"\nTesting with prompt: '{prompt}'") # Load the python model to get configuration information and also to load the tokenizer. print("Loading model and tokenizer using AutoTokenizer:", args.model_path) @@ -144,7 +161,7 @@ def main(): else: model = AutoModel.from_pretrained(args.model_path) - encoded = tokenizer(args.prompt, return_tensors="pt") + encoded = tokenizer(prompt, return_tensors="pt") tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0]) n_tokens = len(tokens) print(f"n_tokens: {n_tokens}"); @@ -155,7 +172,7 @@ def main(): python_embeddings = load_embeddings_from_file(args.python_embeddings, n_tokens, model.config.hidden_size) # Run comparison - results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, args.prompt) + results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, prompt) # Summary print(f"\n=== SUMMARY ===")