Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions examples/model-conversion/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,17 @@ embedding-convert-model:

embedding-run-original-model:
$(call validate_embedding_model_path,embedding-run-original-model)
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/embedding/run-original-model.py
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
./scripts/embedding/run-original-model.py \
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")

embedding-run-converted-model:
@CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/embedding/run-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
@./scripts/embedding/run-converted-model.sh $(CONVERTED_EMBEDDING_MODEL) \
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")

embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
@./scripts/embedding/compare-embeddings-logits.sh
@./scripts/embedding/compare-embeddings-logits.sh \
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")

embedding-inspect-original-model:
$(call validate_embedding_model_path,embedding-inspect-original-model)
Expand Down Expand Up @@ -156,7 +160,8 @@ embedding-quantize-model:
$(call quantize_model,$(CONVERTED_EMBEDDING_MODEL),QUANTIZED_EMBEDDING_MODEL)

embedding-run-quantized-model:
@./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL}
@./scripts/embedding/run-converted-model.sh $(QUANTIZED_EMBEDDING_MODEL) \
$(if $(PROMPTS_FILE),--prompts-file "$(PROMPTS_FILE)")

###
### Perplexity targets/recipes
Expand Down
52 changes: 41 additions & 11 deletions examples/model-conversion/logits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,35 @@ int main(int argc, char ** argv) {
logits = llama_get_embeddings(ctx);
n_logits = llama_model_n_embd(model) * batch.n_tokens;
type = "-embeddings";

const int n_embd = llama_model_n_embd(model);
const int n_embd_count = batch.n_tokens;

printf("Embedding dimension: %d\n", n_embd);
printf("\n");

// Print embeddings in the specified format
for (int j = 0; j < n_embd_count; j++) {
printf("embedding %d: ", j);

// Print first 3 values
for (int i = 0; i < 3 && i < n_embd; i++) {
printf("%9.6f ", logits[j * n_embd + i]);
}

printf(" ... ");

// Print last 3 values
for (int i = n_embd - 3; i < n_embd; i++) {
if (i >= 0) {
printf("%9.6f ", logits[j * n_embd + i]);
}
}

printf("\n");
}
printf("\n");

printf("Embeddings size: %d\n", n_logits);
} else {
logits = llama_get_logits_ith(ctx, batch.n_tokens - 1);
Expand Down Expand Up @@ -183,22 +212,23 @@ int main(int argc, char ** argv) {
return 1;
}
for (int i = 0; i < n_logits; i++) {
fprintf(f, "%d: %.6f\n", i, logits[i]); // Added index and changed format
fprintf(f, "%d: %.6f\n", i, logits[i]);
}
fclose(f);

// Print first and last 10 logits for quick verification
printf("First 10 logits: ");
for (int i = 0; i < 10 && i < n_logits; i++) {
printf("%.6f ", logits[i]);
}
printf("\n");
if (!embedding_mode) {
printf("First 10 logits: ");
for (int i = 0; i < 10 && i < n_logits; i++) {
printf("%.6f ", logits[i]);
}
printf("\n");

printf("Last 10 logits: ");
for (int i = n_logits - 10; i < n_logits; i++) {
if (i >= 0) printf("%.6f ", logits[i]);
printf("Last 10 logits: ");
for (int i = n_logits - 10; i < n_logits; i++) {
if (i >= 0) printf("%.6f ", logits[i]);
}
printf("\n\n");
}
printf("\n\n");

printf("Logits saved to %s\n", bin_filename);
printf("Logits saved to %s\n", txt_filename);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,37 @@

set -e

MODEL_PATH="${1:-"$EMBEDDING_MODEL_PATH"}"
MODEL_NAME="${2:-$(basename "$MODEL_PATH")}"
# Parse command line arguments
MODEL_PATH=""
MODEL_NAME=""
PROMPTS_FILE=""

# First argument is always model path
if [ $# -gt 0 ] && [[ "$1" != --* ]]; then
MODEL_PATH="$1"
shift
fi

# Parse remaining arguments
while [[ $# -gt 0 ]]; do
case $1 in
--prompts-file|-pf)
PROMPTS_FILE="$2"
shift 2
;;
*)
# If MODEL_NAME not set and this isn't a flag, use as model name
if [ -z "$MODEL_NAME" ] && [[ "$1" != --* ]]; then
MODEL_NAME="$1"
fi
shift
;;
esac
done

# Set defaults
MODEL_PATH="${MODEL_PATH:-"$EMBEDDING_MODEL_PATH"}"
MODEL_NAME="${MODEL_NAME:-$(basename "$MODEL_PATH")}"

if [ -t 0 ]; then
CPP_EMBEDDINGS="data/llamacpp-${MODEL_NAME}-embeddings.bin"
Expand Down Expand Up @@ -35,8 +64,18 @@ with open('$TEMP_FILE', 'wb') as f:
trap "rm -f $TEMP_FILE" EXIT
fi

python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
# Build the semantic_check.py command
SEMANTIC_CMD="python scripts/utils/semantic_check.py --model-path $MODEL_PATH \
--python-embeddings data/pytorch-${MODEL_NAME}-embeddings.bin \
--cpp-embeddings $CPP_EMBEDDINGS \
--prompt "Hello world today"
--cpp-embeddings $CPP_EMBEDDINGS"

# Add prompts file if specified, otherwise use default prompt
if [ -n "$PROMPTS_FILE" ]; then
SEMANTIC_CMD="$SEMANTIC_CMD --prompts-file \"$PROMPTS_FILE\""
else
SEMANTIC_CMD="$SEMANTIC_CMD --prompt \"Hello world today\""
fi

# Execute the command
eval $SEMANTIC_CMD

Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,27 @@

set -e

# First try command line argument, then environment variable, then file
CONVERTED_MODEL="${1:-"$CONVERTED_EMBEDDING_MODEL"}"
# Parse command line arguments
CONVERTED_MODEL=""
PROMPTS_FILE=""

while [[ $# -gt 0 ]]; do
case $1 in
-p|--prompts-file)
PROMPTS_FILE="$2"
shift 2
;;
*)
if [ -z "$CONVERTED_MODEL" ]; then
CONVERTED_MODEL="$1"
fi
shift
;;
esac
done

# First try command line argument, then environment variable
CONVERTED_MODEL="${CONVERTED_MODEL:-"$CONVERTED_EMBEDDING_MODEL"}"

# Final check if we have a model path
if [ -z "$CONVERTED_MODEL" ]; then
Expand All @@ -13,8 +32,19 @@ if [ -z "$CONVERTED_MODEL" ]; then
exit 1
fi

# Read prompt from file or use default
if [ -n "$PROMPTS_FILE" ]; then
if [ ! -f "$PROMPTS_FILE" ]; then
echo "Error: Prompts file '$PROMPTS_FILE' not found" >&2
exit 1
fi
PROMPT=$(cat "$PROMPTS_FILE")
else
PROMPT="Hello world today"
fi

echo $CONVERTED_MODEL

cmake --build ../../build --target llama-logits -j8

../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "Hello world today"
# TODO: update logits.cpp to accept a --file/-f option for the prompt
../../build/bin/llama-logits -m "$CONVERTED_MODEL" -embd-mode "$PROMPT"
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,37 @@

parser = argparse.ArgumentParser(description='Process model with specified path')
parser.add_argument('--model-path', '-m', help='Path to the model')
parser.add_argument('--prompts-file', '-p', help='Path to file containing prompts (one per line)')
args = parser.parse_args()

def read_prompt_from_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.read().strip()
except FileNotFoundError:
print(f"Error: Prompts file '{file_path}' not found")
exit(1)
except Exception as e:
print(f"Error reading prompts file: {e}")
exit(1)

model_path = os.environ.get('EMBEDDING_MODEL_PATH', args.model_path)
if model_path is None:
parser.error("Model path must be specified either via --model-path argument or EMBEDDING_MODEL_PATH environment variable")

tokenizer = AutoTokenizer.from_pretrained(model_path)

config = AutoConfig.from_pretrained(model_path)

# This can be used to override the sliding window size for manual testing. This
# can be useful to verify the sliding window attention mask in the original model
# and compare it with the converted .gguf model.
if hasattr(config, 'sliding_window'):
original_sliding_window = config.sliding_window
#original_sliding_window = 6
print(f"Modified sliding window: {original_sliding_window} -> {config.sliding_window}")

print(f"Using unreleased model: {unreleased_model_name}")
if unreleased_model_name:
model_name_lower = unreleased_model_name.lower()
unreleased_module_path = f"transformers.models.{model_name_lower}.modular_{model_name_lower}"
Expand All @@ -29,19 +52,28 @@

try:
model_class = getattr(importlib.import_module(unreleased_module_path), class_name)
model = model_class.from_pretrained(model_path) # Note: from_pretrained, not fromPretrained
model = model_class.from_pretrained(model_path, config=config)
except (ImportError, AttributeError) as e:
print(f"Failed to import or load model: {e}")
exit(1)
else:
model = AutoModel.from_pretrained(model_path)
model = AutoModel.from_pretrained(model_path, config=config)
print(f"Model class: {type(model)}")
#print(f"Model file: {type(model).__module__}")
config = AutoConfig.from_pretrained(model_path)
print(f"Model file: {type(model).__module__}")

# Verify the model is using the correct sliding window
if hasattr(model.config, 'sliding_window'):
print(f"Model's sliding_window: {model.config.sliding_window}")
else:
print("Model config does not have sliding_window attribute")

model_name = os.path.basename(model_path)

texts = [ "Hello world today" ]
if args.prompts_file:
prompt_text = read_prompt_from_file(args.prompts_file)
texts = [prompt_text]
else:
texts = ["Hello world today"]

encoded = tokenizer(
texts,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
file_path = os.path.join(model_path, file_name)
print(f"\n--- From {file_name} ---")

with safe_open(file_path, framework="pt") as f: # type: ignore
with safe_open(file_path, framework="pt") as f:
for tensor_name in sorted(tensor_names):
tensor = f.get_tensor(tensor_name)
print(f"- {tensor_name} : shape = {tensor.shape}, dtype = {tensor.dtype}")
Expand All @@ -49,7 +49,7 @@
# Single file model (original behavior)
print("Single-file model detected")

with safe_open(single_file_path, framework="pt") as f: # type: ignore
with safe_open(single_file_path, framework="pt") as f:
keys = f.keys()
print("Tensors in model:")
for key in sorted(keys):
Expand Down
23 changes: 20 additions & 3 deletions examples/model-conversion/scripts/utils/semantic_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,21 +101,38 @@ def test_single_prompt_similarity(python_emb, cpp_emb, tokens, prompt):
'rms_diff': np.sqrt(np.mean(diff_matrix**2))
}

def read_prompt_from_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as f:
return f.read().strip()
except FileNotFoundError:
print(f"Error: Prompts file '{file_path}' not found")
exit(1)
except Exception as e:
print(f"Error reading prompts file: {e}")
exit(1)

def main():
parser = argparse.ArgumentParser(description='Test semantic similarity between Python and llama.cpp embeddings')
parser.add_argument('--model-path', '-m', required=True, help='Path to the original Python model')
parser.add_argument('--python-embeddings', '-pe', help='Path to pytorch embeddings "logits" binary file')
parser.add_argument('--cpp-embeddings', '-ce', help='Path to llama.cpp embeddings "logits" binary file')
parser.add_argument('--causal', '-c', default=False, help='if the model is causal (default: false)', action='store_true')
parser.add_argument('--prompt', '-p', default='Hello world today', help='Test prompt')
parser.add_argument('--prompts-file', '-pf', help='Path to file containing prompts')

args = parser.parse_args()

if args.prompts_file:
prompt = read_prompt_from_file(args.prompts_file)
else:
prompt = args.prompt

print("Semantic Similarity Test Between Python and llama.cpp Embedding Models")
print("=" * 70)

# Single prompt detailed comparison
print(f"\nTesting with prompt: '{args.prompt}'")
print(f"\nTesting with prompt: '{prompt}'")

# Load the python model to get configuration information and also to load the tokenizer.
print("Loading model and tokenizer using AutoTokenizer:", args.model_path)
Expand Down Expand Up @@ -144,7 +161,7 @@ def main():
else:
model = AutoModel.from_pretrained(args.model_path)

encoded = tokenizer(args.prompt, return_tensors="pt")
encoded = tokenizer(prompt, return_tensors="pt")
tokens = tokenizer.convert_ids_to_tokens(encoded['input_ids'][0])
n_tokens = len(tokens)
print(f"n_tokens: {n_tokens}");
Expand All @@ -155,7 +172,7 @@ def main():
python_embeddings = load_embeddings_from_file(args.python_embeddings, n_tokens, model.config.hidden_size)

# Run comparison
results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, args.prompt)
results = test_single_prompt_similarity(python_embeddings, llamacpp_embeddings, tokens, prompt)

# Summary
print(f"\n=== SUMMARY ===")
Expand Down
Loading