Skip to content

Commit e4b7346

Browse files
committed
Add README
1 parent d81ce04 commit e4b7346

File tree

7 files changed

+457
-548
lines changed

7 files changed

+457
-548
lines changed

LICENSE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
MIT License
1+
The MIT License (MIT)
22

3-
Copyright (c) 2023-2024 The ggml authors
3+
Copyright (c) 2018 Max Strübing
44

55
Permission is hereby granted, free of charge, to any person obtaining a copy
66
of this software and associated documentation files (the "Software"), to deal

README.md

Lines changed: 407 additions & 537 deletions
Large diffs are not rendered by default.

common/arg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3480,7 +3480,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
34803480
[](common_params & params, const std::string & value) { params.diffusion_llada.cfg_scale = std::stof(value); }
34813481
).set_examples({ LLAMA_EXAMPLE_DIFFUSION_LLADA }));
34823482
add_opt(common_arg(
3483-
{ "--diffusion-remasking-alg" }, "N",
3483+
{ "--diffusion-alg" }, "N",
34843484
string_format("remasking algorithm: 0=LOW_CONFIDENCE, 1=RANDOM (default: %d)", params.diffusion_llada.remasking),
34853485
[](common_params & params, int value) { params.diffusion_llada.remasking = value; }
34863486
).set_examples({ LLAMA_EXAMPLE_DIFFUSION_LLADA }));

convert_hf_to_gguf.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,15 +2946,15 @@ def set_gguf_parameters(self):
29462946
self.gguf_writer.add_rope_dimension_count(rope_dim)
29472947

29482948
# Set context length for LLaDA
2949-
context_length = self.hparams.get("max_sequence_length")
2949+
context_length = self.hparams.get("max_sequence_length", 4096)
29502950
self.gguf_writer.add_context_length(context_length)
29512951

29522952
# Set embedding length (dimension size)
2953-
embedding_length = self.hparams.get("d_model")
2953+
embedding_length = self.hparams.get("d_model", 4096)
29542954
self.gguf_writer.add_embedding_length(embedding_length)
29552955

29562956
# Set feed forward length (MLP hidden size)
2957-
feed_forward_length = self.hparams.get("mlp_hidden_size")
2957+
feed_forward_length = self.hparams.get("mlp_hidden_size", 12288)
29582958
self.gguf_writer.add_feed_forward_length(feed_forward_length)
29592959

29602960
# Set RoPE parameters

examples/diffusion/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Diffusion Text Generation Examples
2+
3+
This directory contains implementations for diffusion-based text generation using two different model architectures: **Dream** and **LLaDA-8B**. Both models use iterative denoising processes to generate text, but employ different sampling strategies and algorithms.
4+
5+
## Supported Models
6+
7+
### 1. Dream Model (`llama-diffusion-dream-cli`)
8+
9+
- https://huggingface.co/Dream-org/Dream-v0-Base-7B
10+
- Original PR - https://github.com/ggml-org/llama.cpp/pull/14644
11+
12+
The Dream model supports four different sampling algorithms controlled by the `--diffusion-alg` parameter:
13+
14+
1. **ORIGIN (0)** - Original diffusion algorithm
15+
- Uses probability transfer based on timestep ratios
16+
- Default algorithm with standard confidence-based token selection
17+
18+
2. **MASKGIT_PLUS (1)** - Enhanced MaskGIT sampling
19+
- Improved version of the MaskGIT algorithm
20+
21+
3. **TOPK_MARGIN (2)** - Top-K margin-based sampling
22+
- Confidence calculated as the margin between top-1 and top-2 probabilities
23+
24+
4. **ENTROPY (3)** - Entropy-based sampling (recommended)
25+
- Uses entropy calculation for confidence estimation
26+
27+
### 2. LLaDA-8B Model (`llama-diffusion-llada-cli`)
28+
29+
- https://huggingface.co/GSAI-ML/LLaDA-8B-Instruct
30+
31+
### LLaDA Model Remasking Strategies
32+
33+
The LLaDA model uses two remasking approaches controlled by the `--diffusion-alg` parameter:
34+
35+
1. **REMASKING_LOW_CONFIDENCE (0)** - Default strategy
36+
- Remasks tokens with lowest confidence scores
37+
- Uses softmax probabilities to determine confidence
38+
39+
2. **REMASKING_RANDOM (1)** - Random remasking

examples/diffusion/diffusion-llada-cli.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ int main(int argc, char ** argv) {
489489
//clear screen and move cursor to top-left
490490
LOG_INF("\033[2J\033[H");
491491
}
492-
492+
493493
output_tokens.erase(output_tokens.begin(), output_tokens.begin() + n_input);
494494
std::string output_data = common_detokenize(vocab, output_tokens, false);
495495
LOG_INF("\n%s\n", output_data.c_str());
@@ -502,4 +502,4 @@ int main(int argc, char ** argv) {
502502
llama_backend_free();
503503

504504
return 0;
505-
}
505+
}

src/llama-model.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8024,7 +8024,7 @@ struct llm_build_dream : public llm_graph_context {
80248024
};
80258025

80268026
struct llm_build_llada : public llm_graph_context {
8027-
llm_build_llada(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) :
8027+
llm_build_llada(const llama_model & model, const llm_graph_params & params) :
80288028
llm_graph_context(params) {
80298029
// LLaDA is similar to LLaMA but uses non-causal attention for diffusion
80308030
const int64_t n_embd_head = hparams.n_embd_head_v;
@@ -8077,7 +8077,7 @@ struct llm_build_llada : public llm_graph_context {
80778077
cb(Kcur, "Kcur", il);
80788078
cb(Vcur, "Vcur", il);
80798079

8080-
cur = build_attn(inp_attn, gf, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr,
8080+
cur = build_attn(inp_attn, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr,
80818081
1.0f / sqrtf(float(n_embd_head)), il);
80828082
}
80838083

@@ -17337,7 +17337,7 @@ ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const {
1733717337
break;
1733817338
case LLM_ARCH_LLADA:
1733917339
{
17340-
llm = std::make_unique<llm_build_llada>(*this, params, gf);
17340+
llm = std::make_unique<llm_build_llada>(*this, params);
1734117341
}
1734217342
break;
1734317343
case LLM_ARCH_QWEN2VL:

0 commit comments

Comments
 (0)