From 95e816f2bca48de32167ce6243e6770dee23923d Mon Sep 17 00:00:00 2001 From: Joao Gante Date: Mon, 16 Sep 2024 09:44:57 +0100 Subject: [PATCH 01/50] Cohere: update RoPE structure (#33408) --- .../models/cohere/configuration_cohere.py | 43 +++++ .../models/cohere/modeling_cohere.py | 170 ++++++++++++++---- src/transformers/models/dbrx/modeling_dbrx.py | 2 +- .../models/gemma/modeling_gemma.py | 2 +- .../models/granite/modeling_granite.py | 2 +- .../models/llama/configuration_llama.py | 2 +- .../models/llama/modeling_llama.py | 2 +- .../models/mistral/modeling_mistral.py | 2 +- .../models/mixtral/modeling_mixtral.py | 2 +- src/transformers/models/olmo/modeling_olmo.py | 2 +- .../models/olmoe/modeling_olmoe.py | 2 +- .../models/persimmon/modeling_persimmon.py | 2 +- src/transformers/models/phi/modeling_phi.py | 2 +- src/transformers/models/phi3/modeling_phi3.py | 2 +- .../models/qwen2/modeling_qwen2.py | 2 +- .../models/qwen2_moe/modeling_qwen2_moe.py | 2 +- .../models/stablelm/modeling_stablelm.py | 2 +- .../models/starcoder2/modeling_starcoder2.py | 2 +- 18 files changed, 190 insertions(+), 55 deletions(-) diff --git a/src/transformers/models/cohere/configuration_cohere.py b/src/transformers/models/cohere/configuration_cohere.py index 73973bfad60b93..3c1237e5113789 100644 --- a/src/transformers/models/cohere/configuration_cohere.py +++ b/src/transformers/models/cohere/configuration_cohere.py @@ -20,6 +20,7 @@ """Cohere model configuration""" from ...configuration_utils import PretrainedConfig +from ...modeling_rope_utils import rope_config_validation from ...utils import logging @@ -79,6 +80,43 @@ class CohereConfig(PretrainedConfig): Whether to tie weight embeddings rope_theta (`float`, *optional*, defaults to 10000.0): The base period of the RoPE embeddings. + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type + and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value + accordingly. + Expected contents: + `rope_type` (`str`): + The sub-variant of RoPE to use. Can be one of ['default', 'linear', 'dynamic', 'yarn', 'longrope', + 'llama3'], with 'default' being the original RoPE implementation. + `factor` (`float`, *optional*): + Used with all rope types except 'default'. The scaling factor to apply to the RoPE embeddings. In + most scaling types, a `factor` of x will enable the model to handle sequences of length x * + original maximum pre-trained length. + `original_max_position_embeddings` (`int`, *optional*): + Used with 'dynamic', 'longrope' and 'llama3'. The original max position embeddings used during + pretraining. + `attention_factor` (`float`, *optional*): + Used with 'yarn' and 'longrope'. The scaling factor to be applied on the attention + computation. If unspecified, it defaults to value recommended by the implementation, using the + `factor` field to infer the suggested value. + `beta_fast` (`float`, *optional*): + Only used with 'yarn'. Parameter to set the boundary for extrapolation (only) in the linear + ramp function. If unspecified, it defaults to 32. + `beta_slow` (`float`, *optional*): + Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear + ramp function. If unspecified, it defaults to 1. + `short_factor` (`List[float]`, *optional*): + Only used with 'longrope'. The scaling factor to be applied to short contexts (< + `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden + size divided by the number of attention heads divided by 2 + `long_factor` (`List[float]`, *optional*): + Only used with 'longrope'. The scaling factor to be applied to long contexts (< + `original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden + size divided by the number of attention heads divided by 2 + `low_freq_factor` (`float`, *optional*): + Only used with 'llama3'. Scaling factor applied to low frequency components of the RoPE + `high_freq_factor` (`float`, *optional*): + Only used with 'llama3'. Scaling factor applied to high frequency components of the RoPE attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`): Whether to use a bias in the query, key, value and output projection layers during self-attention. attention_dropout (`float`, *optional*, defaults to 0.0): @@ -121,6 +159,7 @@ def __init__( eos_token_id=255001, tie_word_embeddings=True, rope_theta=10000.0, + rope_scaling=None, attention_bias=False, attention_dropout=0.0, use_qk_norm=False, @@ -144,10 +183,14 @@ def __init__( self.layer_norm_eps = layer_norm_eps self.use_cache = use_cache self.rope_theta = rope_theta + self.rope_scaling = rope_scaling self.attention_bias = attention_bias self.attention_dropout = attention_dropout self.use_qk_norm = use_qk_norm + # Validate the correctness of rotary position embeddings parameters + rope_config_validation(self) + super().__init__( pad_token_id=pad_token_id, bos_token_id=bos_token_id, diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py index 4010d9ec3a4327..ae84a9ec2d1a43 100644 --- a/src/transformers/models/cohere/modeling_cohere.py +++ b/src/transformers/models/cohere/modeling_cohere.py @@ -37,6 +37,7 @@ BaseModelOutputWithPast, CausalLMOutputWithPast, ) +from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS from ...modeling_utils import PreTrainedModel from ...pytorch_utils import ALL_LAYERNORM_LAYERS from ...utils import ( @@ -135,35 +136,97 @@ def forward(self, hidden_states): class CohereRotaryEmbedding(nn.Module): - def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None, scaling_factor=1.0): + # Note: the forward pass of this RoPE is slightly different from Llama's, resulting in different `sin`/`cos` for + # the same parameterization. The differences are highlighted with a comment. + + def __init__( + self, + dim=None, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0, + rope_type="default", + config: Optional[CohereConfig] = None, + ): super().__init__() - self.scaling_factor = scaling_factor - self.dim = dim - self.max_position_embeddings = max_position_embeddings - self.base = base - inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.int64).float().to(device) / self.dim)) + # TODO (joao): remove the `if` below, only used for BC + self.rope_kwargs = {} + if config is None: + logger.warning_once( + "`CohereRotaryEmbedding` can now be fully parameterized by passing the model config through the " + "`config` argument. All other arguments will be removed in v4.46" + ) + self.rope_kwargs = { + "rope_type": rope_type, + "factor": scaling_factor, + "dim": dim, + "base": base, + "max_position_embeddings": max_position_embeddings, + } + self.rope_type = rope_type + self.max_seq_len_cached = max_position_embeddings + self.original_max_seq_len = max_position_embeddings + else: + # BC: "rope_type" was originally "type" + if config.rope_scaling is not None: + self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type")) + else: + self.rope_type = "default" + self.max_seq_len_cached = config.max_position_embeddings + self.original_max_seq_len = config.max_position_embeddings + + self.config = config + self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] + + inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device, **self.rope_kwargs) self.register_buffer("inv_freq", inv_freq, persistent=False) + self.original_inv_freq = self.inv_freq + + def _dynamic_frequency_update(self, position_ids, device): + """ + dynamic RoPE layers should recompute `inv_freq` in the following situations: + 1 - growing beyond the cached sequence length (allow scaling) + 2 - the current sequence length is in the original scale (avoid losing precision with small sequences) + """ + seq_len = torch.max(position_ids) + 1 + if seq_len > self.max_seq_len_cached: # growth + inv_freq, self.attention_scaling = self.rope_init_fn( + self.config, device, seq_len=seq_len, **self.rope_kwargs + ) + self.register_buffer("inv_freq", inv_freq, persistent=False) # TODO joao: may break with compilation + self.max_seq_len_cached = seq_len + + if seq_len < self.original_max_seq_len and self.max_seq_len_cached > self.original_max_seq_len: # reset + self.register_buffer("inv_freq", self.original_inv_freq, persistent=False) + self.max_seq_len_cached = self.original_max_seq_len @torch.no_grad() def forward(self, x, position_ids): - # x: [bs, num_attention_heads, seq_len, head_size] + if "dynamic" in self.rope_type: + self._dynamic_frequency_update(position_ids, device=x.device) + + # Core RoPE block inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1) position_ids_expanded = position_ids[:, None, :].float() - - # Force float32 since bfloat16 loses precision on long contexts - # See https://github.com/huggingface/transformers/pull/29285 + # Force float32 (see https://github.com/huggingface/transformers/pull/29285) device_type = x.device.type device_type = device_type if isinstance(device_type, str) and device_type != "mps" else "cpu" with torch.autocast(device_type=device_type, enabled=False): freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) - emb = torch.repeat_interleave(freqs, 2, dim=-1) + emb = torch.repeat_interleave(freqs, 2, dim=-1) # This line differs from Llama's implementation cos = emb.cos() sin = emb.sin() - return cos, sin + + # Advanced RoPE types (e.g. yarn) apply a post-processing scaling factor, equivalent to scaling attention + cos = cos * self.attention_scaling + sin = sin * self.attention_scaling + + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) def rotate_half(x): - # Split and rotate + # Split and rotate. Note that this function is different from e.g. Llama. x1 = x[..., ::2] x2 = x[..., 1::2] rot_x = torch.stack([-x2, x1], dim=-1).flatten(-2) @@ -272,17 +335,10 @@ def __init__(self, config: CohereConfig, layer_idx: Optional[int] = None): self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias) self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias) self.o_proj = nn.Linear(self.hidden_size, self.hidden_size, bias=config.attention_bias) - self._init_rope() - # Ignore copy - def _init_rope(self): - self.rotary_emb = CohereRotaryEmbedding( - self.head_dim, - max_position_embeddings=self.max_position_embeddings, - base=self.rope_theta, - ) + # TODO (joao): remove in v4.46 (RoPE is computed in the model, not in the decoder layers) + self.rotary_emb = CohereRotaryEmbedding(config=self.config) - # Ignore copy def forward( self, hidden_states: torch.Tensor, @@ -292,6 +348,7 @@ def forward( output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46 **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: bsz, q_len, _ = hidden_states.size() @@ -310,7 +367,16 @@ def forward( key_states = key_states.transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - cos, sin = self.rotary_emb(value_states, position_ids) + if position_embeddings is None: + logger.warning_once( + "The attention layers in this model are transitioning from computing the RoPE embeddings internally " + "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed " + "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be " + "removed and `position_embeddings` will be mandatory." + ) + cos, sin = self.rotary_emb(value_states, position_ids) + else: + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: @@ -350,8 +416,7 @@ def forward( return attn_output, attn_weights, past_key_value -# copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->Cohere -# TODO(joao): add me back asap :) +# Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2 with Llama->Cohere class CohereFlashAttention2(CohereAttention): """ Cohere flash attention module. This module inherits from `CohereAttention` as the weights of the module stays @@ -377,6 +442,7 @@ def forward( output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46 **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if isinstance(past_key_value, StaticCache): @@ -402,7 +468,16 @@ def forward( key_states = key_states.transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - cos, sin = self.rotary_emb(value_states, position_ids) + if position_embeddings is None: + logger.warning_once( + "The attention layers in this model are transitioning from computing the RoPE embeddings internally " + "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed " + "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be " + "removed and `position_embeddings` will be mandatory." + ) + cos, sin = self.rotary_emb(value_states, position_ids) + else: + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: @@ -418,7 +493,6 @@ def forward( dropout_rate = self.attention_dropout if self.training else 0.0 - # Ignore copy # In PEFT, usually we cast the layer norms in float32 for training stability reasons # therefore the input hidden states gets silently casted in float32. Hence, we need # cast them back in the correct dtype just to be sure everything works as expected. @@ -465,8 +539,6 @@ def forward( return attn_output, attn_weights, past_key_value -# copied from transformers.models.llama.modeling_llama.LlamaSdpaAttention Llama->Cohere -# TODO(joao): add me back asap :) class CohereSdpaAttention(CohereAttention): """ Cohere attention module using torch.nn.functional.scaled_dot_product_attention. This module inherits from @@ -474,7 +546,6 @@ class CohereSdpaAttention(CohereAttention): SDPA API. """ - # Ignore copy def forward( self, hidden_states: torch.Tensor, @@ -484,6 +555,7 @@ def forward( output_attentions: bool = False, use_cache: bool = False, cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46 ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: if output_attentions: # TODO: Improve this warning with e.g. `model.config.attn_implementation = "manual"` once this is implemented. @@ -517,7 +589,16 @@ def forward( key_states = key_states.transpose(1, 2) value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2) - cos, sin = self.rotary_emb(value_states, position_ids) + if position_embeddings is None: + logger.warning_once( + "The attention layers in this model are transitioning from computing the RoPE embeddings internally " + "through `position_ids` (2D tensor with the indexes of the tokens), to using externally computed " + "`position_embeddings` (Tuple of tensors, containing cos and sin). In v4.46 `position_ids` will be " + "removed and `position_embeddings` will be mandatory." + ) + cos, sin = self.rotary_emb(value_states, position_ids) + else: + cos, sin = position_embeddings query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) if past_key_value is not None: @@ -587,6 +668,7 @@ def forward( output_attentions: Optional[bool] = False, use_cache: Optional[bool] = False, cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, # will become mandatory in v4.46 ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]: """ Args: @@ -601,6 +683,11 @@ def forward( If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see `past_key_values`). past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*): + Indices depicting the position of the input sequence tokens in the sequence + position_embeddings (`Tuple[torch.FloatTensor, torch.FloatTensor]`, *optional*): + Tuple containing the cosine and sine positional embeddings of shape `(batch_size, seq_len, head_dim)`, + with `head_dim` being the embedding dimension of each attention head. """ residual = hidden_states @@ -615,6 +702,7 @@ def forward( output_attentions=output_attentions, use_cache=use_cache, cache_position=cache_position, + position_embeddings=position_embeddings, ) # Fully Connected @@ -755,8 +843,7 @@ def _init_weights(self, module): "The bare Cohere Model outputting raw hidden-states without any specific head on top.", COHERE_START_DOCSTRING, ) -# copied from transformers.models.llama.modeling_llama.LlamaModel with Llama->Cohere -# TODO(joao): add me back asap :) +# Copied from transformers.models.llama.modeling_llama.LlamaModel with Llama->Cohere, LLAMA->COHERE class CohereModel(CoherePreTrainedModel): """ Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`CohereDecoderLayer`] @@ -776,6 +863,7 @@ def __init__(self, config: CohereConfig): [CohereDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] ) self.norm = CohereLayerNorm(hidden_size=(config.hidden_size), eps=config.layer_norm_eps) + self.rotary_emb = CohereRotaryEmbedding(config=config) self.gradient_checkpointing = False # Initialize weights and apply final processing @@ -787,14 +875,13 @@ def get_input_embeddings(self): def set_input_embeddings(self, value): self.embed_tokens = value - # Ignore copy @add_start_docstrings_to_model_forward(COHERE_INPUTS_DOCSTRING) def forward( self, input_ids: torch.LongTensor = None, attention_mask: Optional[torch.Tensor] = None, position_ids: Optional[torch.LongTensor] = None, - past_key_values: Optional[List[torch.FloatTensor]] = None, + past_key_values: Optional[Union[Cache, List[torch.FloatTensor]]] = None, inputs_embeds: Optional[torch.FloatTensor] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, @@ -823,30 +910,33 @@ def forward( if inputs_embeds is None: inputs_embeds = self.embed_tokens(input_ids) - past_seen_tokens = 0 return_legacy_cache = False if ( use_cache and not isinstance(past_key_values, Cache) and not self.training ): # kept for BC (non `Cache` `past_key_values` inputs) return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) + logger.warning_once( + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" + ) if cache_position is None: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 cache_position = torch.arange( past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device ) - if position_ids is None: position_ids = cache_position.unsqueeze(0) causal_mask = self._update_causal_mask( attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions ) - - # embed positions hidden_states = inputs_embeds + # create position embeddings to be shared across the decoder layers + position_embeddings = self.rotary_emb(hidden_states, position_ids) + # decoder layers all_hidden_states = () if output_hidden_states else None all_self_attns = () if output_attentions else None @@ -866,6 +956,7 @@ def forward( output_attentions, use_cache, cache_position, + position_embeddings, ) else: layer_outputs = decoder_layer( @@ -876,6 +967,7 @@ def forward( output_attentions=output_attentions, use_cache=use_cache, cache_position=cache_position, + position_embeddings=position_embeddings, ) hidden_states = layer_outputs[0] diff --git a/src/transformers/models/dbrx/modeling_dbrx.py b/src/transformers/models/dbrx/modeling_dbrx.py index 8db9f6e8b7d09f..43bac44ba1be20 100644 --- a/src/transformers/models/dbrx/modeling_dbrx.py +++ b/src/transformers/models/dbrx/modeling_dbrx.py @@ -1066,7 +1066,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py index 085751cd9bc039..b14e0a4b3d8ca5 100644 --- a/src/transformers/models/gemma/modeling_gemma.py +++ b/src/transformers/models/gemma/modeling_gemma.py @@ -862,7 +862,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/granite/modeling_granite.py b/src/transformers/models/granite/modeling_granite.py index ff10b6e6d875f9..876f5ed2a7c8da 100644 --- a/src/transformers/models/granite/modeling_granite.py +++ b/src/transformers/models/granite/modeling_granite.py @@ -839,7 +839,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/llama/configuration_llama.py b/src/transformers/models/llama/configuration_llama.py index 435f0091e06e70..a3667e06534564 100644 --- a/src/transformers/models/llama/configuration_llama.py +++ b/src/transformers/models/llama/configuration_llama.py @@ -192,7 +192,7 @@ def __init__( self.mlp_bias = mlp_bias self.head_dim = head_dim if head_dim is not None else self.hidden_size // self.num_attention_heads # Validate the correctness of rotary position embeddings parameters - # BC: if there is a 'type' field, move it to 'rope_type'. + # BC: if there is a 'type' field, copy it it to 'rope_type'. if self.rope_scaling is not None and "type" in self.rope_scaling: self.rope_scaling["rope_type"] = self.rope_scaling["type"] rope_config_validation(self) diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py index 9a1d6c0749f932..c7017832b9324c 100644 --- a/src/transformers/models/llama/modeling_llama.py +++ b/src/transformers/models/llama/modeling_llama.py @@ -951,7 +951,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py index c43418182c3881..ffe16b27203301 100644 --- a/src/transformers/models/mistral/modeling_mistral.py +++ b/src/transformers/models/mistral/modeling_mistral.py @@ -767,7 +767,7 @@ def forward( past_key_values = DynamicCache.from_legacy_cache(past_key_values) return_legacy_cache = True logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/mixtral/modeling_mixtral.py b/src/transformers/models/mixtral/modeling_mixtral.py index 2e23d06699087e..c7062e75b1085c 100644 --- a/src/transformers/models/mixtral/modeling_mixtral.py +++ b/src/transformers/models/mixtral/modeling_mixtral.py @@ -1023,7 +1023,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py index 007e69570e7821..b4bda8e2db5251 100644 --- a/src/transformers/models/olmo/modeling_olmo.py +++ b/src/transformers/models/olmo/modeling_olmo.py @@ -873,7 +873,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/olmoe/modeling_olmoe.py b/src/transformers/models/olmoe/modeling_olmoe.py index a53f1eeda61196..a33338365312db 100644 --- a/src/transformers/models/olmoe/modeling_olmoe.py +++ b/src/transformers/models/olmoe/modeling_olmoe.py @@ -1012,7 +1012,7 @@ def forward( return_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 9fab09bdcc7877..ccaa2c7fd29aae 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -690,7 +690,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/phi/modeling_phi.py b/src/transformers/models/phi/modeling_phi.py index 0d8be04af20d5c..648d1653a3b503 100644 --- a/src/transformers/models/phi/modeling_phi.py +++ b/src/transformers/models/phi/modeling_phi.py @@ -981,7 +981,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/phi3/modeling_phi3.py b/src/transformers/models/phi3/modeling_phi3.py index 273b6a8f505e79..ec395679ae6207 100644 --- a/src/transformers/models/phi3/modeling_phi3.py +++ b/src/transformers/models/phi3/modeling_phi3.py @@ -1008,7 +1008,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py index 030c74b034b794..d0ea8ef0e376e0 100644 --- a/src/transformers/models/qwen2/modeling_qwen2.py +++ b/src/transformers/models/qwen2/modeling_qwen2.py @@ -920,7 +920,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py index b196ed72a49b23..6f483e50cde065 100644 --- a/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +++ b/src/transformers/models/qwen2_moe/modeling_qwen2_moe.py @@ -1084,7 +1084,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/stablelm/modeling_stablelm.py b/src/transformers/models/stablelm/modeling_stablelm.py index 27d0c856a61bd6..d91c0832ed33da 100755 --- a/src/transformers/models/stablelm/modeling_stablelm.py +++ b/src/transformers/models/stablelm/modeling_stablelm.py @@ -965,7 +965,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) diff --git a/src/transformers/models/starcoder2/modeling_starcoder2.py b/src/transformers/models/starcoder2/modeling_starcoder2.py index c359c07c69c0b8..0be37c4e1fb91c 100644 --- a/src/transformers/models/starcoder2/modeling_starcoder2.py +++ b/src/transformers/models/starcoder2/modeling_starcoder2.py @@ -894,7 +894,7 @@ def forward( use_legacy_cache = True past_key_values = DynamicCache.from_legacy_cache(past_key_values) logger.warning_once( - "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.46. " "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/internal/generation_utils#transformers.Cache)" ) From 5ce0a113b5bc9dd8dbb92dd866772d79847d9a92 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:07:59 +0200 Subject: [PATCH 02/50] Fix SSH workflow (#33451) * fix * update --------- Co-authored-by: ydshieh --- .github/workflows/ssh-runner.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ssh-runner.yml b/.github/workflows/ssh-runner.yml index b433abb484fac4..db649876f60492 100644 --- a/.github/workflows/ssh-runner.yml +++ b/.github/workflows/ssh-runner.yml @@ -58,8 +58,19 @@ jobs: #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step shell: bash run: | - if [ "${{ secrets[format('{0}_{1}', github.actor, 'SLACK_ID')] }}" != "" ]; then - echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', github.actor, 'SLACK_ID')] }}" >> $GITHUB_ENV + echo "${{ github.actor }}" + github_actor=${{ github.actor }} + github_actor=${github_actor/'-'/'_'} + echo "$github_actor" + echo "github_actor=$github_actor" >> $GITHUB_ENV + + - name: Store Slack infos + #because the SSH can be enabled dynamically if the workflow failed, so we need to store slack infos to be able to retrieve them during the waitforssh step + shell: bash + run: | + echo "${{ env.github_actor }}" + if [ "${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" != "" ]; then + echo "SLACKCHANNEL=${{ secrets[format('{0}_{1}', env.github_actor, 'SLACK_ID')] }}" >> $GITHUB_ENV else echo "SLACKCHANNEL=${{ secrets.SLACK_CIFEEDBACK_CHANNEL }}" >> $GITHUB_ENV fi From ce62a41880b5b70a304d068eb58f55894a5a7af8 Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 16 Sep 2024 13:08:31 +0200 Subject: [PATCH 03/50] Add keypoint-detection task guide (#33274) --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/_toctree.yml | 2 + docs/source/en/tasks/keypoint_detection.md | 154 +++++++++++++++++++++ 2 files changed, 156 insertions(+) create mode 100644 docs/source/en/tasks/keypoint_detection.md diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 235ea81a7f1ea6..7eff2a38302669 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -81,6 +81,8 @@ title: Image Feature Extraction - local: tasks/mask_generation title: Mask Generation + - local: tasks/keypoint_detection + title: Keypoint Detection - local: tasks/knowledge_distillation_for_image_classification title: Knowledge Distillation for Computer Vision title: Computer Vision diff --git a/docs/source/en/tasks/keypoint_detection.md b/docs/source/en/tasks/keypoint_detection.md new file mode 100644 index 00000000000000..a0ec71a5c22000 --- /dev/null +++ b/docs/source/en/tasks/keypoint_detection.md @@ -0,0 +1,154 @@ + + +# Keypoint Detection + +[[open-in-colab]] + +Keypoint detection identifies and locates specific points of interest within an image. These keypoints, also known as landmarks, represent meaningful features of objects, such as facial features or object parts. These models take an image input and return the following outputs: + +- **Keypoints and Scores**: Points of interest and their confidence scores. +- **Descriptors**: A representation of the image region surrounding each keypoint, capturing its texture, gradient, orientation and other properties. + +In this guide, we will show how to extract keypoints from images. + +For this tutorial, we will use [SuperPoint](./model_doc/superpoint.md), a foundation model for keypoint detection. + +```python +from transformers import AutoImageProcessor, SuperPointForKeypointDetection +processor = AutoImageProcessor.from_pretrained("magic-leap-community/superpoint") +model = SuperPointForKeypointDetection.from_pretrained("magic-leap-community/superpoint") +``` + +Let's test the model on the images below. + +
+ Bee + Cats +
+ + +```python +import torch +from PIL import Image +import requests +import cv2 + + +url_image_1 = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg" +image_1 = Image.open(requests.get(url_image_1, stream=True).raw) +url_image_2 = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png" +image_2 = Image.open(requests.get(url_image_2, stream=True).raw) + +images = [image_1, image_2] +``` + +We can now process our inputs and infer. + +```python +inputs = processor(images,return_tensors="pt").to(model.device, model.dtype) +outputs = model(**inputs) +``` + +The model output has relative keypoints, descriptors, masks and scores for each item in the batch. The mask highlights areas of the image where keypoints are present. + +```python +SuperPointKeypointDescriptionOutput(loss=None, keypoints=tensor([[[0.0437, 0.0167], + [0.0688, 0.0167], + [0.0172, 0.0188], + ..., + [0.5984, 0.9812], + [0.6953, 0.9812]]]), + scores=tensor([[0.0056, 0.0053, 0.0079, ..., 0.0125, 0.0539, 0.0377], + [0.0206, 0.0058, 0.0065, ..., 0.0000, 0.0000, 0.0000]], + grad_fn=), descriptors=tensor([[[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357], + [-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357], + [-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357], + ...], + grad_fn=), mask=tensor([[1, 1, 1, ..., 1, 1, 1], + [1, 1, 1, ..., 0, 0, 0]], dtype=torch.int32), hidden_states=None) +``` + +To plot actual keypoints in the image, we need to postprocess the output. To do so, we have to pass the actual image sizes to `post_process_keypoint_detection` along with outputs. + +```python +image_sizes = [(image.size[1], image.size[0]) for image in images] +outputs = processor.post_process_keypoint_detection(outputs, image_sizes) +``` + +The outputs are now a list of dictionaries where each dictionary is a processed output of keypoints, scores and descriptors. + +```python +[{'keypoints': tensor([[ 226, 57], + [ 356, 57], + [ 89, 64], + ..., + [3604, 3391]], dtype=torch.int32), + 'scores': tensor([0.0056, 0.0053, ...], grad_fn=), + 'descriptors': tensor([[-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357], + [-0.0807, 0.0114, -0.1210, ..., -0.1122, 0.0899, 0.0357]], + grad_fn=)}, + {'keypoints': tensor([[ 46, 6], + [ 78, 6], + [422, 6], + [206, 404]], dtype=torch.int32), + 'scores': tensor([0.0206, 0.0058, 0.0065, 0.0053, 0.0070, ...,grad_fn=), + 'descriptors': tensor([[-0.0525, 0.0726, 0.0270, ..., 0.0389, -0.0189, -0.0211], + [-0.0525, 0.0726, 0.0270, ..., 0.0389, -0.0189, -0.0211]}] +``` + +We can use these to plot the keypoints. + +```python +import matplotlib.pyplot as plt +import torch + +for i in range(len(images)): + keypoints = outputs[i]["keypoints"] + scores = outputs[i]["scores"] + descriptors = outputs[i]["descriptors"] + keypoints = outputs[i]["keypoints"].detach().numpy() + scores = outputs[i]["scores"].detach().numpy() + image = images[i] + image_width, image_height = image.size + + plt.axis('off') + plt.imshow(image) + plt.scatter( + keypoints[:, 0], + keypoints[:, 1], + s=scores * 100, + c='cyan', + alpha=0.4 + ) + plt.show() +``` + +Below you can see the outputs. + +
+ Bee + Cats +
+ From 2f62146f0e916c3e6752b59d34853be6df0506f2 Mon Sep 17 00:00:00 2001 From: Yoni Gozlan <74535834+yonigozlan@users.noreply.github.com> Date: Mon, 16 Sep 2024 11:26:26 -0400 Subject: [PATCH 04/50] Uniformize kwargs for LLaVa processor and update docs (#32858) * Uniformize kwargs for LlaVa and update docs * Change order of processor inputs in docstring * Improve BC support for reversed images and text inputs * cleanup llava processor call docstring * Add encoded inputs as valid text inputs in reverse input check, add deprecation version in warning * Put function check reversed images text outside base processor class * Refactor _validate_images_text_input_order * Add ProcessingUtilTester * fix processing and test_processing --- .../models/llava/modeling_llava.py | 2 +- .../models/llava/processing_llava.py | 73 ++++++++++--------- tests/models/llava/test_modeling_llava.py | 20 ++--- tests/models/llava/test_processor_llava.py | 57 ++++++++++++++- 4 files changed, 104 insertions(+), 48 deletions(-) diff --git a/src/transformers/models/llava/modeling_llava.py b/src/transformers/models/llava/modeling_llava.py index 9ad19ccee72228..eb1c55341b0784 100644 --- a/src/transformers/models/llava/modeling_llava.py +++ b/src/transformers/models/llava/modeling_llava.py @@ -405,7 +405,7 @@ def forward( >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg" >>> image = Image.open(requests.get(url, stream=True).raw) - >>> inputs = processor(text=prompt, images=image, return_tensors="pt") + >>> inputs = processor(images=image, text=prompt, return_tensors="pt") >>> # Generate >>> generate_ids = model.generate(**inputs, max_new_tokens=15) diff --git a/src/transformers/models/llava/processing_llava.py b/src/transformers/models/llava/processing_llava.py index 678724ae95be41..28a9410e6cbf0b 100644 --- a/src/transformers/models/llava/processing_llava.py +++ b/src/transformers/models/llava/processing_llava.py @@ -16,18 +16,33 @@ Processor class for Llava. """ -from typing import List, Optional, Union +import sys +from typing import List, Union from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, get_image_size, to_numpy_array -from ...processing_utils import ProcessorMixin -from ...tokenization_utils_base import PaddingStrategy, PreTokenizedInput, TextInput, TruncationStrategy -from ...utils import TensorType, logging +from ...processing_utils import ProcessingKwargs, ProcessorMixin, _validate_images_text_input_order +from ...tokenization_utils_base import PreTokenizedInput, TextInput +from ...utils import logging +if sys.version_info >= (3, 11): + from typing import Unpack +else: + from typing_extensions import Unpack + logger = logging.get_logger(__name__) +class LlavaProcessorKwargs(ProcessingKwargs, total=False): + _defaults = { + "text_kwargs": { + "padding": False, + }, + "images_kwargs": {}, + } + + class LlavaProcessor(ProcessorMixin): r""" Constructs a Llava processor which wraps a Llava image processor and a Llava tokenizer into a single processor. @@ -73,12 +88,11 @@ def __init__( def __call__( self, - text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, images: ImageInput = None, - padding: Union[bool, str, PaddingStrategy] = False, - truncation: Union[bool, str, TruncationStrategy] = None, - max_length=None, - return_tensors: Optional[Union[str, TensorType]] = TensorType.PYTORCH, + text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None, + audio=None, + videos=None, + **kwargs: Unpack[LlavaProcessorKwargs], ) -> BatchFeature: """ Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text` @@ -88,29 +102,15 @@ def __call__( of the above two methods for more information. Args: + images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): + The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch + tensor. Both channels-first and channels-last formats are supported. text (`str`, `List[str]`, `List[List[str]]`): The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set `is_split_into_words=True` (to lift the ambiguity with a batch of sequences). - images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`): - The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch - tensor. Both channels-first and channels-last formats are supported. - padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `False`): - Select a strategy to pad the returned sequences (according to the model's padding side and padding - index) among: - - `True` or `'longest'`: Pad to the longest sequence in the batch (or no padding if only a single - sequence if provided). - - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum - acceptable input length for the model if that argument is not provided. - - `False` or `'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of different - lengths). - max_length (`int`, *optional*): - Maximum length of the returned list and optionally padding length (see above). - truncation (`bool`, *optional*): - Activates truncation to cut input sequences longer than `max_length` to `max_length`. return_tensors (`str` or [`~utils.TensorType`], *optional*): If set, will return tensors of a particular framework. Acceptable values are: - - `'tf'`: Return TensorFlow `tf.constant` objects. - `'pt'`: Return PyTorch `torch.Tensor` objects. - `'np'`: Return NumPy `np.ndarray` objects. @@ -125,8 +125,19 @@ def __call__( `None`). - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`. """ + if images is None and text is None: + raise ValueError("You have to specify at least one of `images` or `text`.") + + # check if images and text inputs are reversed for BC + images, text = _validate_images_text_input_order(images, text) + + output_kwargs = self._merge_kwargs( + LlavaProcessorKwargs, + tokenizer_init_kwargs=self.tokenizer.init_kwargs, + **kwargs, + ) if images is not None: - image_inputs = self.image_processor(images, return_tensors=return_tensors) + image_inputs = self.image_processor(images, **output_kwargs["images_kwargs"]) else: image_inputs = {} @@ -158,13 +169,7 @@ def __call__( "Using processors without these attributes in the config is deprecated and will throw an error in v4.47." ) - text_inputs = self.tokenizer( - prompt_strings, - return_tensors=return_tensors, - padding=padding, - truncation=truncation, - max_length=max_length, - ) + text_inputs = self.tokenizer(prompt_strings, **output_kwargs["text_kwargs"]) return BatchFeature(data={**text_inputs, **image_inputs}) # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Llama diff --git a/tests/models/llava/test_modeling_llava.py b/tests/models/llava/test_modeling_llava.py index 5c05480ffa6dbb..305fc9e9a84cdb 100644 --- a/tests/models/llava/test_modeling_llava.py +++ b/tests/models/llava/test_modeling_llava.py @@ -274,7 +274,7 @@ def test_small_model_integration_test(self): prompt = "\nUSER: What are the things I should be cautious about when I visit this place?\nASSISTANT:" image_file = "https://llava-vl.github.io/static/images/view.jpg" raw_image = Image.open(requests.get(image_file, stream=True).raw) - inputs = self.processor(prompt, raw_image, return_tensors="pt") + inputs = self.processor(images=raw_image, text=prompt, return_tensors="pt") EXPECTED_INPUT_IDS = torch.tensor([[1, 32000, 28705, 13, 11123, 28747, 1824, 460, 272, 1722,315, 1023, 347, 13831, 925, 684, 739, 315, 3251, 456,1633, 28804, 13, 4816, 8048, 12738, 28747]]) # fmt: skip self.assertTrue(torch.equal(inputs["input_ids"], EXPECTED_INPUT_IDS)) @@ -299,7 +299,7 @@ def test_small_model_integration_test_llama_single(self): prompt = "USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT:" image_file = "https://llava-vl.github.io/static/images/view.jpg" raw_image = Image.open(requests.get(image_file, stream=True).raw) - inputs = processor(prompt, raw_image, return_tensors="pt").to(torch_device, torch.float16) + inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(torch_device, torch.float16) output = model.generate(**inputs, max_new_tokens=900, do_sample=False) EXPECTED_DECODED_TEXT = "USER: \nWhat are the things I should be cautious about when I visit this place? ASSISTANT: When visiting this place, which is a pier or dock extending over a body of water, there are a few things to be cautious about. First, be aware of the weather conditions, as sudden changes in weather can make the pier unsafe to walk on. Second, be mindful of the water depth and any potential hazards, such as submerged rocks or debris, that could cause accidents or injuries. Additionally, be cautious of the tides and currents, as they can change rapidly and pose a risk to swimmers or those who venture too close to the edge of the pier. Finally, be respectful of the environment and other visitors, and follow any posted rules or guidelines for the area." # fmt: skip @@ -325,7 +325,7 @@ def test_small_model_integration_test_llama_batched(self): image1 = Image.open(requests.get("https://llava-vl.github.io/static/images/view.jpg", stream=True).raw) image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw) - inputs = processor(prompts, images=[image1, image2], return_tensors="pt", padding=True) + inputs = processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True) output = model.generate(**inputs, max_new_tokens=20) @@ -349,7 +349,7 @@ def test_small_model_integration_test_batch(self): image1 = Image.open(requests.get("https://llava-vl.github.io/static/images/view.jpg", stream=True).raw) image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw) - inputs = self.processor(prompts, images=[image1, image2], return_tensors="pt", padding=True) + inputs = self.processor(images=[image1, image2], text=prompts, return_tensors="pt", padding=True) output = model.generate(**inputs, max_new_tokens=20) @@ -381,7 +381,7 @@ def test_small_model_integration_test_llama_batched_regression(self): image1 = Image.open(requests.get("https://llava-vl.github.io/static/images/view.jpg", stream=True).raw) image2 = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw) - inputs = processor(prompts, images=[image1, image2, image1], return_tensors="pt", padding=True) + inputs = processor(images=[image1, image2, image1], text=prompts, return_tensors="pt", padding=True) output = model.generate(**inputs, max_new_tokens=20) @@ -409,8 +409,8 @@ def test_batched_generation(self): image2 = Image.open(requests.get(url2, stream=True).raw) inputs = processor( - text=[prompt1, prompt2, prompt3], images=[image1, image2, image1, image2], + text=[prompt1, prompt2, prompt3], return_tensors="pt", padding=True, ).to(torch_device) @@ -444,7 +444,7 @@ def test_llava_index_error_bug(self): image_file = "http://images.cocodataset.org/val2017/000000039769.jpg" raw_image = Image.open(requests.get(image_file, stream=True).raw) - inputs = processor(prompt, raw_image, return_tensors="pt").to(torch_device, torch.float16) + inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(torch_device, torch.float16) # Make sure that `generate` works _ = model.generate(**inputs, max_new_tokens=20) @@ -510,7 +510,7 @@ def test_generation_no_images(self): processor = AutoProcessor.from_pretrained(model_id) # Prepare inputs with no images - inputs = processor("Hello, I am", return_tensors="pt").to(torch_device) + inputs = processor(text="Hello, I am", return_tensors="pt").to(torch_device) # Make sure that `generate` works _ = model.generate(**inputs, max_new_tokens=20) @@ -554,13 +554,13 @@ def test_expansion_in_processing(self): # check processing with expansion of inputs processor.vision_feature_select_strategy = "default" processor.patch_size = 14 - inputs_expanded = processor(prompt, raw_image, return_tensors="pt").to(torch_device, torch.float16) + inputs_expanded = processor(images=raw_image, text=prompt, return_tensors="pt").to(torch_device, torch.float16) self.assertTrue(inputs_expanded.input_ids.shape[-1] == 593) # check processing without expansion of inputs (legacy behavior) processor.vision_feature_select_strategy = None processor.patch_size = None - inputs = processor(prompt, raw_image, return_tensors="pt").to(torch_device, torch.float16) + inputs = processor(images=raw_image, text=prompt, return_tensors="pt").to(torch_device, torch.float16) self.assertTrue(inputs.input_ids.shape[-1] == 18) # generate exactly 20 tokens diff --git a/tests/models/llava/test_processor_llava.py b/tests/models/llava/test_processor_llava.py index 54c1b4674cbcef..5b05a8b92ea513 100644 --- a/tests/models/llava/test_processor_llava.py +++ b/tests/models/llava/test_processor_llava.py @@ -11,18 +11,43 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import shutil +import tempfile import unittest -from transformers.testing_utils import require_vision +from transformers import AutoProcessor, AutoTokenizer, LlamaTokenizerFast, LlavaProcessor +from transformers.testing_utils import require_torch, require_vision from transformers.utils import is_vision_available +from ...test_processing_common import ProcessorTesterMixin + if is_vision_available(): - from transformers import AutoTokenizer, LlavaProcessor + from transformers import CLIPImageProcessor @require_vision -class LlavaProcessorTest(unittest.TestCase): +class LlavaProcessorTest(ProcessorTesterMixin, unittest.TestCase): + processor_class = LlavaProcessor + + def setUp(self): + self.tmpdirname = tempfile.mkdtemp() + image_processor = CLIPImageProcessor(do_center_crop=False) + tokenizer = LlamaTokenizerFast.from_pretrained("huggyllama/llama-7b") + + processor = LlavaProcessor(image_processor=image_processor, tokenizer=tokenizer) + + processor.save_pretrained(self.tmpdirname) + + def get_tokenizer(self, **kwargs): + return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).tokenizer + + def get_image_processor(self, **kwargs): + return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor + + def tearDown(self): + shutil.rmtree(self.tmpdirname) + def test_can_load_various_tokenizers(self): for checkpoint in ["Intel/llava-gemma-2b", "llava-hf/llava-1.5-7b-hf"]: processor = LlavaProcessor.from_pretrained(checkpoint) @@ -45,3 +70,29 @@ def test_chat_template(self): formatted_prompt = processor.apply_chat_template(messages, add_generation_prompt=True) self.assertEqual(expected_prompt, formatted_prompt) + + @require_torch + @require_vision + def test_unstructured_kwargs_batched(self): + if "image_processor" not in self.processor_class.attributes: + self.skipTest(f"image_processor attribute not present in {self.processor_class}") + image_processor = self.get_component("image_processor") + tokenizer = self.get_component("tokenizer") + + processor = self.processor_class(tokenizer=tokenizer, image_processor=image_processor) + self.skip_processor_without_typed_kwargs(processor) + + input_str = ["lower newer", "upper older longer string"] + image_input = self.prepare_image_inputs() * 2 + inputs = processor( + images=image_input, + text=input_str, + return_tensors="pt", + size={"height": 214, "width": 214}, + padding="longest", + max_length=76, + ) + + self.assertEqual(inputs["pixel_values"].shape[2], 214) + + self.assertEqual(len(inputs["input_ids"][0]), 5) From c7a91f5adf976e0517c4a7f1506fb0c24f353053 Mon Sep 17 00:00:00 2001 From: Sergio Paniego Blanco Date: Mon, 16 Sep 2024 18:52:27 +0200 Subject: [PATCH 05/50] `Agents, supercharged - Multi-agents, External tools, and more` docs typo fixed (#33478) * Typo fixed in Agents, supercharged --- docs/source/en/agents_advanced.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/en/agents_advanced.md b/docs/source/en/agents_advanced.md index e7469a310c4102..399eeb9b70eb20 100644 --- a/docs/source/en/agents_advanced.md +++ b/docs/source/en/agents_advanced.md @@ -34,7 +34,7 @@ You can easily build hierarchical multi-agent systems with `transformers.agents` To do so, encapsulate the agent in a [`ManagedAgent`] object. This object needs arguments `agent`, `name`, and a `description`, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools. -Here's an example of making an agent that managed a specitif web search agent using our [`DuckDuckGoSearchTool`]: +Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]: ```py from transformers.agents import ReactCodeAgent, HfApiEngine, DuckDuckGoSearchTool, ManagedAgent From c2d05897bf4e8b34773838accaddd66028bc148d Mon Sep 17 00:00:00 2001 From: Ahmed Almaghz <53489256+AhmedAlmaghz@users.noreply.github.com> Date: Mon, 16 Sep 2024 20:02:03 +0300 Subject: [PATCH 06/50] [i18n-ar] Add File : `docs/source/ar/_toctree.yml` (#32696) * Update ar lang build_documentation.yml * Update ar lang build_pr_documentation.yml * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/pipeline_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/autoclass_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/preprocessing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/training.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/run_scripts.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/accelerate.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Create _config.py * Update _toctree.yml * Update _toctree.yml * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/peft.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update _toctree.yml * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/model_sharing.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/conversations.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/agents.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update docs/source/ar/llm_tutorial.md Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> * Update llm_tutorial.md * Update _toctree.yml * Update autoclass_tutorial.md * Update autoclass_tutorial.md * Update preprocessing.md * Update glossary.md * Update run_scripts.md * Update run_scripts.md * Update run_scripts.md --------- Co-authored-by: Abdullah Mohammed <554032+abodacs@users.noreply.github.com> --- .github/workflows/build_documentation.yml | 2 +- .github/workflows/build_pr_documentation.yml | 2 +- docs/source/ar/_config.py | 14 + docs/source/ar/_toctree.yml | 892 +++++++++++++++++++ docs/source/ar/accelerate.md | 120 +++ docs/source/ar/agents.md | 539 +++++++++++ docs/source/ar/autoclass_tutorial.md | 167 ++++ docs/source/ar/conversations.md | 204 +++++ docs/source/ar/glossary.md | 446 ++++++++++ docs/source/ar/index.md | 342 +++++++ docs/source/ar/installation.md | 246 +++++ docs/source/ar/llm_tutorial.md | 248 ++++++ docs/source/ar/model_sharing.md | 223 +++++ docs/source/ar/peft.md | 250 ++++++ docs/source/ar/pipeline_tutorial.md | 315 +++++++ docs/source/ar/preprocessing.md | 521 +++++++++++ docs/source/ar/quicktour.md | 543 +++++++++++ docs/source/ar/run_scripts.md | 351 ++++++++ docs/source/ar/training.md | 412 +++++++++ 19 files changed, 5835 insertions(+), 2 deletions(-) create mode 100644 docs/source/ar/_config.py create mode 100644 docs/source/ar/_toctree.yml create mode 100644 docs/source/ar/accelerate.md create mode 100644 docs/source/ar/agents.md create mode 100644 docs/source/ar/autoclass_tutorial.md create mode 100644 docs/source/ar/conversations.md create mode 100644 docs/source/ar/glossary.md create mode 100644 docs/source/ar/index.md create mode 100644 docs/source/ar/installation.md create mode 100644 docs/source/ar/llm_tutorial.md create mode 100644 docs/source/ar/model_sharing.md create mode 100644 docs/source/ar/peft.md create mode 100644 docs/source/ar/pipeline_tutorial.md create mode 100644 docs/source/ar/preprocessing.md create mode 100644 docs/source/ar/quicktour.md create mode 100644 docs/source/ar/run_scripts.md create mode 100644 docs/source/ar/training.md diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml index e3e3b5f2df37f1..b25567fb092a14 100644 --- a/.github/workflows/build_documentation.yml +++ b/.github/workflows/build_documentation.yml @@ -15,7 +15,7 @@ jobs: commit_sha: ${{ github.sha }} package: transformers notebook_folder: transformers_doc - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder secrets: token: ${{ secrets.HUGGINGFACE_PUSH }} diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml index c8d073ea34688f..f698f860b2f93c 100644 --- a/.github/workflows/build_pr_documentation.yml +++ b/.github/workflows/build_pr_documentation.yml @@ -14,5 +14,5 @@ jobs: commit_sha: ${{ github.event.pull_request.head.sha }} pr_number: ${{ github.event.number }} package: transformers - languages: de en es fr hi it ko pt tr zh ja te + languages: ar de en es fr hi it ko pt tr zh ja te custom_container: huggingface/transformers-doc-builder diff --git a/docs/source/ar/_config.py b/docs/source/ar/_config.py new file mode 100644 index 00000000000000..f49e4e4731965a --- /dev/null +++ b/docs/source/ar/_config.py @@ -0,0 +1,14 @@ +# docstyle-ignore +INSTALL_CONTENT = """ +# Transformers installation +! pip install transformers datasets evaluate accelerate +# To install from source instead of the last release, comment the command above and uncomment the following one. +# ! pip install git+https://github.com/huggingface/transformers.git +""" + +notebook_first_cells = [{"type": "code", "content": INSTALL_CONTENT}] +black_avoid_patterns = { + "{processor_class}": "FakeProcessorClass", + "{model_class}": "FakeModelClass", + "{object_class}": "FakeObjectClass", +} diff --git a/docs/source/ar/_toctree.yml b/docs/source/ar/_toctree.yml new file mode 100644 index 00000000000000..39e0ae14e19c29 --- /dev/null +++ b/docs/source/ar/_toctree.yml @@ -0,0 +1,892 @@ +- sections: + - local: index + title: 🤗 المحولات + - local: quicktour + title: جولة سريعة + - local: installation + title: التثبيت + title: البدء +- sections: + - local: pipeline_tutorial + title: تشغيل الاستنتاج باستخدام خطوط الأنابيب + - local: autoclass_tutorial + title: كتابة تعليمات برمجية متكيفه باستخدام AutoClass + - local: preprocessing + title: معالجة البيانات مسبقًا + - local: training + title: ضبط نموذج مسبق التدريب + - local: run_scripts + title: التدريب باستخدام نص برمجي + - local: accelerate + title: إعداد تدريب موزع باستخدام 🤗 Accelerate + - local: peft + title: تحميل النماذج المخصصة وتدريبها باستخدام 🤗 PEFT + - local: model_sharing + title: مشاركة نموذجك + - local: agents + title: الوكلاء + - local: llm_tutorial + title: التوليد باستخدام LLMs + - local: conversations + title: الدردشة مع المحولات + title: البرامج التعليمية +# - sections: +# - isExpanded: false +# sections: +# - local: tasks/sequence_classification +# title: تصنيف النصوص +# - local: tasks/token_classification +# title: تصنيف الرموز +# - local: tasks/question_answering +# title: الإجابة على الأسئلة +# - local: tasks/language_modeling +# title: نمذجة اللغة السببية +# - local: tasks/masked_language_modeling +# title: نمذجة اللغة المقنعة +# - local: tasks/translation +# title: الترجمة +# - local: tasks/summarization +# title: التلخيص +# - local: tasks/multiple_choice +# title: الاختيار المتعدد +# title: معالجة اللغات الطبيعية +# - isExpanded: false +# sections: +# - local: tasks/audio_classification +# title: تصنيف الصوت +# - local: tasks/asr +# title: التعرف التلقائي على الكلام +# title: الصوت +# - isExpanded: false +# sections: +# - local: tasks/image_classification +# title: تصنيف الصور +# - local: tasks/semantic_segmentation +# title: تجزئة الصور +# - local: tasks/video_classification +# title: تصنيف الفيديو +# - local: tasks/object_detection +# title: اكتشاف الأشياء +# - local: tasks/zero_shot_object_detection +# title: اكتشاف الأشياء بدون تدريب +# - local: tasks/zero_shot_image_classification +# title: تصنيف الصور بدون تدريب +# - local: tasks/monocular_depth_estimation +# title: تقدير العمق +# - local: tasks/image_to_image +# title: صورة إلى صورة +# - local: tasks/image_feature_extraction +# title: استخراج ميزات الصورة +# - local: tasks/mask_generation +# title: توليد القناع +# - local: tasks/knowledge_distillation_for_image_classification +# title: التقليل المعرفي للرؤية الحاسوبية +# title: الرؤية الحاسوبية +# - isExpanded: false +# sections: +# - local: tasks/image_captioning +# title: وصف الصور Image captioning +# - local: tasks/document_question_answering +# title: الإجابة على أسئلة المستندات +# - local: tasks/visual_question_answering +# title: الإجابة على الأسئلة المرئية +# - local: tasks/text-to-speech +# title: تحويل النص إلى كلام +# title: المتعددة الوسائط +# - isExpanded: false +# sections: +# - local: generation_strategies +# title: تخصيص استراتيجية التوليد +# - local: kv_cache +# title: أفضل الممارسات للتوليد باستخدام ذاكرة التخزين المؤقت +# title: التوليد +# - isExpanded: false +# sections: +# - local: tasks/idefics +# title: مهام الصور مع IDEFICS +# - local: tasks/prompting +# title: دليل إرشادي لمحفزات النماذج اللغوية الكبيرة +# title: الإرشاد +# title: أدلة المهام +# - sections: +# - local: fast_tokenizers +# title: استخدم برامج التجزئة السريعة من 🤗 Tokenizers +# - local: multilingual +# title: تشغيل الاستنتاج باستخدام نماذج متعددة اللغات +# - local: create_a_model +# title: استخدام واجهات برمجة التطبيقات الخاصة بالنموذج +# - local: custom_models +# title: مشاركة نموذج مخصص +# - local: chat_templating +# title: قوالب لنماذج الدردشة +# - local: trainer +# title: المدرب +# - local: sagemaker +# title: تشغيل التدريب على Amazon SageMaker +# - local: serialization +# title: التصدير إلى ONNX +# - local: tflite +# title: التصدير إلى TFLite +# - local: torchscript +# title: التصدير إلى TorchScript +# - local: benchmarks +# title: المعايير +# - local: notebooks +# title: دفاتر الملاحظات مع الأمثلة +# - local: community +# title: موارد المجتمع +# - local: troubleshooting +# title: استكشاف الأخطاء وإصلاحها +# - local: gguf +# title: التوافق مع ملفات GGUF +# title: أدلة المطورين +# - sections: +# - local: quantization/overview +# title: نظرة عامة +# - local: quantization/bitsandbytes +# title: bitsandbytes +# - local: quantization/gptq +# title: GPTQ +# - local: quantization/awq +# title: AWQ +# - local: quantization/aqlm +# title: AQLM +# - local: quantization/quanto +# title: Quanto +# - local: quantization/eetq +# title: EETQ +# - local: quantization/hqq +# title: HQQ +# - local: quantization/optimum +# title: Optimum +# - local: quantization/contribute +# title: المساهمة بطريقة جديدة للتكميم +# title: أساليب التكميم +# - sections: +# - local: performance +# title: الأداء-نظرة عامة +# - local: llm_optims +# title: تحسين الاستدلال LLM +# - sections: +# - local: perf_train_gpu_one +# title: استخدام عدة وحدات معالجة رسوميات (GPUs) بشكل متوازٍ +# - local: perf_train_gpu_many +# title: وحدات معالجة الرسومات (GPU) متعددة والتوازي +# - local: fsdp +# title: Fully Sharded Data Parallel +# - local: deepspeed +# title: DeepSpeed +# - local: perf_train_cpu +# title: التدريب الفعال على وحدة المعالجة المركزية (CPU) +# - local: perf_train_cpu_many +# title: التدريب الموزع لوحدة المعالجة المركزية (CPU) +# - local: perf_train_tpu_tf +# title: التدريب على (TPU) باستخدام TensorFlow +# - local: perf_train_special +# title: تدريب PyTorch على Apple silicon +# - local: perf_hardware +# title: الأجهزة المخصصة للتدريب +# - local: hpo_train +# title: البحث عن المعاملات المثلى باستخدام واجهة برمجة تطبيقات المدرب +# title: تقنيات التدريب الفعال +# - sections: +# - local: perf_infer_cpu +# title: الإستدلال على وحدة المعالجة المركزية (CPU) +# - local: perf_infer_gpu_one +# title: الإستدلال على وحدة معالجة الرسومات (GPU) +# title: تحسين الاستدلال +# - local: big_models +# title: إنشاء نموذج كبير +# - local: debugging +# title: تصحيح الأخطاء البرمجية +# - local: tf_xla +# title: تكامل XLA لنماذج TensorFlow +# - local: perf_torch_compile +# title: تحسين الاستدلال باستخدام `torch.compile()` +# title: الأداء وقابلية التوسع +# - sections: +# - local: contributing +# title: كيفية المساهمة في 🤗 المحولات؟ +# - local: add_new_model +# title: كيفية إضافة نموذج إلى 🤗 المحولات؟ +# - local: add_new_pipeline +# title: كيفية إضافة خط أنابيب إلى 🤗 المحولات؟ +# - local: testing +# title: الاختبار +# - local: pr_checks +# title: التحقق من طلب السحب +# title: المساهمة +- sections: + # - local: philosophy + # title: الفلسفة + - local: glossary + title: (قاموس المصطلحات (قائمة الكلمات + # - local: task_summary + # title: ما الذي يمكن أن تفعله 🤗 المحولات + # - local: tasks_explained + # title: كيف تحل المحولات المهام + # - local: model_summary + # title: عائلة نماذج المحول + # - local: tokenizer_summary + # title: ملخص برنامج مقسم النصوص (tokenizers) + # - local: attention + # title: الانتباه Attention + # - local: pad_truncation + # title: الحشو والتقليم + # - local: bertology + # title: BERTology + # - local: perplexity + # title: حيرة النماذج ذات الطول الثابت + # - local: pipeline_webserver + # title: خطوط الأنابيب للاستدلال على خادم الويب + # - local: model_memory_anatomy + # title: تشريح تدريب النموذج + # - local: llm_tutorial_optimization + # title: الاستفادة القصوى من LLMs + title: أطر مفاهيمية +# - sections: +# - sections: +# - local: main_classes/agent +# title: الوكلاء والأدوات +# - local: model_doc/auto +# title: فئات يتم إنشاؤها ديناميكيًا +# - local: main_classes/backbones +# title: العمود الفقري +# - local: main_classes/callback +# title: عمليات الاسترجاع +# - local: main_classes/configuration +# title: التكوين +# - local: main_classes/data_collator +# title: مجمع البيانات +# - local: main_classes/keras_callbacks +# title: استدعاءات Keras +# - local: main_classes/logging +# title: التسجيل +# - local: main_classes/model +# title: النماذج +# - local: main_classes/text_generation +# title: توليد النصوص +# - local: main_classes/onnx +# title: ONNX +# - local: main_classes/optimizer_schedules +# title: التحسين +# - local: main_classes/output +# title: مخرجات النموذج +# - local: main_classes/pipelines +# title: خطوط الأنابيب +# - local: main_classes/processors +# title: المعالجات +# - local: main_classes/quantization +# title: التكميم +# - local: main_classes/tokenizer +# title: برنامج مقسم النصوص +# - local: main_classes/trainer +# title: المدرب +# - local: main_classes/deepspeed +# title: DeepSpeed +# - local: main_classes/feature_extractor +# title: مستخرج الميزات +# - local: main_classes/image_processor +# title: معالج الصور +# title: الفئات الرئيسية +# - sections: +# - isExpanded: false +# sections: +# - local: model_doc/albert +# title: ALBERT +# - local: model_doc/bart +# title: BART +# - local: model_doc/barthez +# title: BARThez +# - local: model_doc/bartpho +# title: BARTpho +# - local: model_doc/bert +# title: BERT +# - local: model_doc/bert-generation +# title: BertGeneration +# - local: model_doc/bert-japanese +# title: BertJapanese +# - local: model_doc/bertweet +# title: Bertweet +# - local: model_doc/big_bird +# title: BigBird +# - local: model_doc/bigbird_pegasus +# title: BigBirdPegasus +# - local: model_doc/biogpt +# title: BioGpt +# - local: model_doc/blenderbot +# title: Blenderbot +# - local: model_doc/blenderbot-small +# title: Blenderbot Small +# - local: model_doc/bloom +# title: BLOOM +# - local: model_doc/bort +# title: BORT +# - local: model_doc/byt5 +# title: ByT5 +# - local: model_doc/camembert +# title: CamemBERT +# - local: model_doc/canine +# title: CANINE +# - local: model_doc/codegen +# title: CodeGen +# - local: model_doc/code_llama +# title: CodeLlama +# - local: model_doc/cohere +# title: Cohere +# - local: model_doc/convbert +# title: ConvBERT +# - local: model_doc/cpm +# title: CPM +# - local: model_doc/cpmant +# title: CPMANT +# - local: model_doc/ctrl +# title: CTRL +# - local: model_doc/dbrx +# title: DBRX +# - local: model_doc/deberta +# title: DeBERTa +# - local: model_doc/deberta-v2 +# title: DeBERTa-v2 +# - local: model_doc/dialogpt +# title: DialoGPT +# - local: model_doc/distilbert +# title: DistilBERT +# - local: model_doc/dpr +# title: DPR +# - local: model_doc/electra +# title: ELECTRA +# - local: model_doc/encoder-decoder +# title: Encoder Decoder Models +# - local: model_doc/ernie +# title: ERNIE +# - local: model_doc/ernie_m +# title: ErnieM +# - local: model_doc/esm +# title: ESM +# - local: model_doc/falcon +# title: Falcon +# - local: model_doc/fastspeech2_conformer +# title: FastSpeech2Conformer +# - local: model_doc/flan-t5 +# title: FLAN-T5 +# - local: model_doc/flan-ul2 +# title: FLAN-UL2 +# - local: model_doc/flaubert +# title: FlauBERT +# - local: model_doc/fnet +# title: FNet +# - local: model_doc/fsmt +# title: FSMT +# - local: model_doc/funnel +# title: Funnel Transformer +# - local: model_doc/fuyu +# title: Fuyu +# - local: model_doc/gemma +# title: Gemma +# - local: model_doc/openai-gpt +# title: GPT +# - local: model_doc/gpt_neo +# title: GPT Neo +# - local: model_doc/gpt_neox +# title: GPT NeoX +# - local: model_doc/gpt_neox_japanese +# title: GPT NeoX Japanese +# - local: model_doc/gptj +# title: GPT-J +# - local: model_doc/gpt2 +# title: GPT2 +# - local: model_doc/gpt_bigcode +# title: GPTBigCode +# - local: model_doc/gptsan-japanese +# title: GPTSAN Japanese +# - local: model_doc/gpt-sw3 +# title: GPTSw3 +# - local: model_doc/herbert +# title: HerBERT +# - local: model_doc/ibert +# title: I-BERT +# - local: model_doc/jamba +# title: Jamba +# - local: model_doc/jetmoe +# title: JetMoe +# - local: model_doc/jukebox +# title: Jukebox +# - local: model_doc/led +# title: LED +# - local: model_doc/llama +# title: LLaMA +# - local: model_doc/llama2 +# title: Llama2 +# - local: model_doc/llama3 +# title: Llama3 +# - local: model_doc/longformer +# title: Longformer +# - local: model_doc/longt5 +# title: LongT5 +# - local: model_doc/luke +# title: LUKE +# - local: model_doc/m2m_100 +# title: M2M100 +# - local: model_doc/madlad-400 +# title: MADLAD-400 +# - local: model_doc/mamba +# title: Mamba +# - local: model_doc/marian +# title: MarianMT +# - local: model_doc/markuplm +# title: MarkupLM +# - local: model_doc/mbart +# title: MBart and MBart-50 +# - local: model_doc/mega +# title: MEGA +# - local: model_doc/megatron-bert +# title: MegatronBERT +# - local: model_doc/megatron_gpt2 +# title: MegatronGPT2 +# - local: model_doc/mistral +# title: Mistral +# - local: model_doc/mixtral +# title: Mixtral +# - local: model_doc/mluke +# title: mLUKE +# - local: model_doc/mobilebert +# title: MobileBERT +# - local: model_doc/mpnet +# title: MPNet +# - local: model_doc/mpt +# title: MPT +# - local: model_doc/mra +# title: MRA +# - local: model_doc/mt5 +# title: MT5 +# - local: model_doc/mvp +# title: MVP +# - local: model_doc/nezha +# title: NEZHA +# - local: model_doc/nllb +# title: NLLB +# - local: model_doc/nllb-moe +# title: NLLB-MoE +# - local: model_doc/nystromformer +# title: Nyströmformer +# - local: model_doc/olmo +# title: OLMo +# - local: model_doc/open-llama +# title: Open-Llama +# - local: model_doc/opt +# title: OPT +# - local: model_doc/pegasus +# title: Pegasus +# - local: model_doc/pegasus_x +# title: PEGASUS-X +# - local: model_doc/persimmon +# title: Persimmon +# - local: model_doc/phi +# title: Phi +# - local: model_doc/phi3 +# title: Phi-3 +# - local: model_doc/phobert +# title: PhoBERT +# - local: model_doc/plbart +# title: PLBart +# - local: model_doc/prophetnet +# title: ProphetNet +# - local: model_doc/qdqbert +# title: QDQBert +# - local: model_doc/qwen2 +# title: Qwen2 +# - local: model_doc/qwen2_moe +# title: Qwen2MoE +# - local: model_doc/rag +# title: RAG +# - local: model_doc/realm +# title: REALM +# - local: model_doc/recurrent_gemma +# title: RecurrentGemma +# - local: model_doc/reformer +# title: Reformer +# - local: model_doc/rembert +# title: RemBERT +# - local: model_doc/retribert +# title: RetriBERT +# - local: model_doc/roberta +# title: RoBERTa +# - local: model_doc/roberta-prelayernorm +# title: RoBERTa-PreLayerNorm +# - local: model_doc/roc_bert +# title: RoCBert +# - local: model_doc/roformer +# title: RoFormer +# - local: model_doc/rwkv +# title: RWKV +# - local: model_doc/splinter +# title: Splinter +# - local: model_doc/squeezebert +# title: SqueezeBERT +# - local: model_doc/stablelm +# title: StableLm +# - local: model_doc/starcoder2 +# title: Starcoder2 +# - local: model_doc/switch_transformers +# title: SwitchTransformers +# - local: model_doc/t5 +# title: T5 +# - local: model_doc/t5v1.1 +# title: T5v1.1 +# - local: model_doc/tapex +# title: TAPEX +# - local: model_doc/transfo-xl +# title: Transformer XL +# - local: model_doc/ul2 +# title: UL2 +# - local: model_doc/umt5 +# title: UMT5 +# - local: model_doc/xmod +# title: X-MOD +# - local: model_doc/xglm +# title: XGLM +# - local: model_doc/xlm +# title: XLM +# - local: model_doc/xlm-prophetnet +# title: XLM-ProphetNet +# - local: model_doc/xlm-roberta +# title: XLM-RoBERTa +# - local: model_doc/xlm-roberta-xl +# title: XLM-RoBERTa-XL +# - local: model_doc/xlm-v +# title: XLM-V +# - local: model_doc/xlnet +# title: XLNet +# - local: model_doc/yoso +# title: YOSO +# title: Text models +# - isExpanded: false +# sections: +# - local: model_doc/beit +# title: BEiT +# - local: model_doc/bit +# title: BiT +# - local: model_doc/conditional_detr +# title: Conditional DETR +# - local: model_doc/convnext +# title: ConvNeXT +# - local: model_doc/convnextv2 +# title: ConvNeXTV2 +# - local: model_doc/cvt +# title: CVT +# - local: model_doc/deformable_detr +# title: Deformable DETR +# - local: model_doc/deit +# title: DeiT +# - local: model_doc/depth_anything +# title: Depth Anything +# - local: model_doc/deta +# title: DETA +# - local: model_doc/detr +# title: DETR +# - local: model_doc/dinat +# title: DiNAT +# - local: model_doc/dinov2 +# title: DINOV2 +# - local: model_doc/dit +# title: DiT +# - local: model_doc/dpt +# title: DPT +# - local: model_doc/efficientformer +# title: EfficientFormer +# - local: model_doc/efficientnet +# title: EfficientNet +# - local: model_doc/focalnet +# title: FocalNet +# - local: model_doc/glpn +# title: GLPN +# - local: model_doc/imagegpt +# title: ImageGPT +# - local: model_doc/levit +# title: LeViT +# - local: model_doc/mask2former +# title: Mask2Former +# - local: model_doc/maskformer +# title: MaskFormer +# - local: model_doc/mobilenet_v1 +# title: MobileNetV1 +# - local: model_doc/mobilenet_v2 +# title: MobileNetV2 +# - local: model_doc/mobilevit +# title: MobileViT +# - local: model_doc/mobilevitv2 +# title: MobileViTV2 +# - local: model_doc/nat +# title: NAT +# - local: model_doc/poolformer +# title: PoolFormer +# - local: model_doc/pvt +# title: Pyramid Vision Transformer (PVT) +# - local: model_doc/pvt_v2 +# title: Pyramid Vision Transformer v2 (PVTv2) +# - local: model_doc/regnet +# title: RegNet +# - local: model_doc/resnet +# title: ResNet +# - local: model_doc/segformer +# title: SegFormer +# - local: model_doc/seggpt +# title: SegGpt +# - local: model_doc/superpoint +# title: SuperPoint +# - local: model_doc/swiftformer +# title: SwiftFormer +# - local: model_doc/swin +# title: Swin Transformer +# - local: model_doc/swinv2 +# title: Swin Transformer V2 +# - local: model_doc/swin2sr +# title: Swin2SR +# - local: model_doc/table-transformer +# title: Table Transformer +# - local: model_doc/upernet +# title: UperNet +# - local: model_doc/van +# title: VAN +# - local: model_doc/vit +# title: Vision Transformer (ViT) +# - local: model_doc/vit_hybrid +# title: ViT Hybrid +# - local: model_doc/vitdet +# title: ViTDet +# - local: model_doc/vit_mae +# title: ViTMAE +# - local: model_doc/vitmatte +# title: ViTMatte +# - local: model_doc/vit_msn +# title: ViTMSN +# - local: model_doc/yolos +# title: YOLOS +# title: Vision models +# - isExpanded: false +# sections: +# - local: model_doc/audio-spectrogram-transformer +# title: Audio Spectrogram Transformer +# - local: model_doc/bark +# title: Bark +# - local: model_doc/clap +# title: CLAP +# - local: model_doc/encodec +# title: EnCodec +# - local: model_doc/hubert +# title: Hubert +# - local: model_doc/mctct +# title: MCTCT +# - local: model_doc/mms +# title: MMS +# - local: model_doc/musicgen +# title: MusicGen +# - local: model_doc/musicgen_melody +# title: MusicGen Melody +# - local: model_doc/pop2piano +# title: Pop2Piano +# - local: model_doc/seamless_m4t +# title: Seamless-M4T +# - local: model_doc/seamless_m4t_v2 +# title: SeamlessM4T-v2 +# - local: model_doc/sew +# title: SEW +# - local: model_doc/sew-d +# title: SEW-D +# - local: model_doc/speech_to_text +# title: Speech2Text +# - local: model_doc/speech_to_text_2 +# title: Speech2Text2 +# - local: model_doc/speecht5 +# title: SpeechT5 +# - local: model_doc/unispeech +# title: UniSpeech +# - local: model_doc/unispeech-sat +# title: UniSpeech-SAT +# - local: model_doc/univnet +# title: UnivNet +# - local: model_doc/vits +# title: VITS +# - local: model_doc/wav2vec2 +# title: Wav2Vec2 +# - local: model_doc/wav2vec2-bert +# title: Wav2Vec2-BERT +# - local: model_doc/wav2vec2-conformer +# title: Wav2Vec2-Conformer +# - local: model_doc/wav2vec2_phoneme +# title: Wav2Vec2Phoneme +# - local: model_doc/wavlm +# title: WavLM +# - local: model_doc/whisper +# title: Whisper +# - local: model_doc/xls_r +# title: XLS-R +# - local: model_doc/xlsr_wav2vec2 +# title: XLSR-Wav2Vec2 +# title: Audio models +# - isExpanded: false +# sections: +# - local: model_doc/timesformer +# title: TimeSformer +# - local: model_doc/videomae +# title: VideoMAE +# - local: model_doc/vivit +# title: ViViT +# title: Video models +# - isExpanded: false +# sections: +# - local: model_doc/align +# title: ALIGN +# - local: model_doc/altclip +# title: AltCLIP +# - local: model_doc/blip +# title: BLIP +# - local: model_doc/blip-2 +# title: BLIP-2 +# - local: model_doc/bridgetower +# title: BridgeTower +# - local: model_doc/bros +# title: BROS +# - local: model_doc/chinese_clip +# title: Chinese-CLIP +# - local: model_doc/clip +# title: CLIP +# - local: model_doc/clipseg +# title: CLIPSeg +# - local: model_doc/clvp +# title: CLVP +# - local: model_doc/data2vec +# title: Data2Vec +# - local: model_doc/deplot +# title: DePlot +# - local: model_doc/donut +# title: Donut +# - local: model_doc/flava +# title: FLAVA +# - local: model_doc/git +# title: GIT +# - local: model_doc/grounding-dino +# title: Grounding DINO +# - local: model_doc/groupvit +# title: GroupViT +# - local: model_doc/idefics +# title: IDEFICS +# - local: model_doc/idefics2 +# title: Idefics2 +# - local: model_doc/instructblip +# title: InstructBLIP +# - local: model_doc/kosmos-2 +# title: KOSMOS-2 +# - local: model_doc/layoutlm +# title: LayoutLM +# - local: model_doc/layoutlmv2 +# title: LayoutLMV2 +# - local: model_doc/layoutlmv3 +# title: LayoutLMV3 +# - local: model_doc/layoutxlm +# title: LayoutXLM +# - local: model_doc/lilt +# title: LiLT +# - local: model_doc/llava +# title: Llava +# - local: model_doc/llava_next +# title: LLaVA-NeXT +# - local: model_doc/lxmert +# title: LXMERT +# - local: model_doc/matcha +# title: MatCha +# - local: model_doc/mgp-str +# title: MGP-STR +# - local: model_doc/nougat +# title: Nougat +# - local: model_doc/oneformer +# title: OneFormer +# - local: model_doc/owlvit +# title: OWL-ViT +# - local: model_doc/owlv2 +# title: OWLv2 +# - local: model_doc/paligemma +# title: PaliGemma +# - local: model_doc/perceiver +# title: Perceiver +# - local: model_doc/pix2struct +# title: Pix2Struct +# - local: model_doc/sam +# title: Segment Anything +# - local: model_doc/siglip +# title: SigLIP +# - local: model_doc/speech-encoder-decoder +# title: Speech Encoder Decoder Models +# - local: model_doc/tapas +# title: TAPAS +# - local: model_doc/trocr +# title: TrOCR +# - local: model_doc/tvlt +# title: TVLT +# - local: model_doc/tvp +# title: TVP +# - local: model_doc/udop +# title: UDOP +# - local: model_doc/video_llava +# title: VideoLlava +# - local: model_doc/vilt +# title: ViLT +# - local: model_doc/vipllava +# title: VipLlava +# - local: model_doc/vision-encoder-decoder +# title: Vision Encoder Decoder Models +# - local: model_doc/vision-text-dual-encoder +# title: Vision Text Dual Encoder +# - local: model_doc/visual_bert +# title: VisualBERT +# - local: model_doc/xclip +# title: X-CLIP +# title: Multimodal models +# - isExpanded: false +# sections: +# - local: model_doc/decision_transformer +# title: محول القرار +# - local: model_doc/trajectory_transformer +# title: محول المسار +# title: نماذج التعلم التعزيزية +# - isExpanded: false +# sections: +# - local: model_doc/autoformer +# title: Autoformer +# - local: model_doc/informer +# title: Informer +# - local: model_doc/patchtsmixer +# title: PatchTSMixer +# - local: model_doc/patchtst +# title: PatchTST +# - local: model_doc/time_series_transformer +# title: محول السلاسل الزمنية +# title: نماذج السلاسل الزمنية +# - isExpanded: false +# sections: +# - local: model_doc/graphormer +# title: Graphormer +# title: نماذج الرسم البياني +# title: النماذج +# - sections: +# - local: internal/modeling_utils +# title: الطبقات المخصصة والمرافق +# - local: internal/pipelines_utils +# title: مرافق خطوط الأنابيب +# - local: internal/tokenization_utils +# title: مرافق مقسم النصوص +# - local: internal/trainer_utils +# title: مرافق المدرب +# - local: internal/generation_utils +# title: مرافق التوليد +# - local: internal/image_processing_utils +# title: مرافق معالجة الصور +# - local: internal/audio_utils +# title: مرافق معالجة الصوت +# - local: internal/file_utils +# title: مرافق عامة +# - local: internal/time_series_utils +# title: مرافق السلاسل الزمنية +# title: مساعدون داخليون +# title: API diff --git a/docs/source/ar/accelerate.md b/docs/source/ar/accelerate.md new file mode 100644 index 00000000000000..486c1efe59af60 --- /dev/null +++ b/docs/source/ar/accelerate.md @@ -0,0 +1,120 @@ +# التدريب الموزع باستخدام 🤗 Accelerate + + +مع تزايد حجم النماذج اللغوية، برز التوازي كأحد الاستراتيجيات لتدريب نماذج أكبر على أجهزة محدودة وتسريع عملية التدريب بمقدار كبير. أنشأنا في Hugging Face، قمنا بإنشاء مكتبة [ Accelerate](https://huggingface.co/docs/accelerate) لمساعدة المستخدمين على تدريب أي نموذج من Transformers بسهولة على أي نوع من الإعدادات الموزعة، سواء كان ذلك على عدة وحدات معالجة رسومات (GPUs) على جهاز واحد أو على عدة وحدات معالجة رسومات موزعة على عدة أجهزة. في هذا الدليل، تعلم كيفية تخصيص حلقة تدريب PyTorch الأصلية لتمكين التدريب في بيئة موزعة. + +## الإعداد + +ابدأ بتثبيت 🤗 Accelerate: + +```bash +pip install accelerate +``` + +ثم قم باستيراد وإنشاء كائن [`~accelerate.Accelerator`]. سيقوم [`~accelerate.Accelerator`] تلقائيًا باكتشاف نوع الإعداد الموزع الخاص بك وتهيئة جميع المكونات اللازمة للتدريب. لن تحتاج إلى وضع نموذجك على جهاز بشكل معين. + +```py +>>> from accelerate import Accelerator + +>>> accelerator = Accelerator() +``` + +## الاستعداد للتسريع + +الخطوة التالية هي تمرير جميع كائنات التدريب ذات الصلة إلى دالة الإعداد [`~accelerate.Accelerator.prepare`]. ويشمل ذلك DataLoaders للتدريب والتقييم، ونموذجًا ومُحَسِّنً المعاملات (optimizer): + +```py +>>> train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( +... train_dataloader, eval_dataloader, model, optimizer +... ) +``` + +## الخلفي Backward + +الإضافة الأخيرة هي استبدال الدالة المعتادة `loss.backward()` في حلقة التدريب الخاصة بك بدالة [`~accelerate.Accelerator.backward`] في 🤗 Accelerate: + +```py +>>> for epoch in range(num_epochs): +... for batch in train_dataloader: +... outputs = model(**batch) +... loss = outputs.loss +... accelerator.backward(loss) + +... optimizer.step() +... lr_scheduler.step() +... optimizer.zero_grad() +... progress_bar.update(1) +``` + +كما يمكنك أن ترى في الكود التالي، فأنت بحاجة فقط إلى إضافة أربعة أسطر من الكود إلى حلقة التدريب الخاصة بك لتمكين التدريب الموزع! + +```diff ++ from accelerate import Accelerator + from transformers import AdamW, AutoModelForSequenceClassification, get_scheduler + ++ accelerator = Accelerator() + + model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2) + optimizer = AdamW(model.parameters(), lr=3e-5) + +- device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") +- model.to(device) + ++ train_dataloader, eval_dataloader, model, optimizer = accelerator.prepare( ++ train_dataloader, eval_dataloader, model, optimizer ++ ) + + num_epochs = 3 + num_training_steps = num_epochs * len(train_dataloader) + lr_scheduler = get_scheduler( + "linear", + optimizer=optimizer, + num_warmup_steps=0, + num_training_steps=num_training_steps + ) + + progress_bar = tqdm(range(num_training_steps)) + + model.train() + for epoch in range(num_epochs): + for batch in train_dataloader: +- batch = {k: v.to(device) for k, v in batch.items()} + outputs = model(**batch) + loss = outputs.loss +- loss.backward() ++ accelerator.backward(loss) +optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + progress_bar.update(1) +``` + +## تدريب + +بمجرد إضافة أسطر الكود ذات الصلة، قم بتشغيل التدريب الخاص بك في أحد النصوص أو الدفاتر مثل Colaboratory. + +### التدريب باستخدام نص برمجي + +إذا كنت تشغل التدريب الخاص بك من نص برمجي، فقم بتشغيل الأمر التالي لإنشاء وحفظ ملف تكوين: + +```bash +accelerate config +``` + +ثم قم بتشغيل التدريب الخاص بك باستخدام: + +```bash +accelerate launch train.py +``` + +### التدريب باستخدام دفتر ملاحظات + +يمكن أيضًا تشغيل 🤗 Accelerate في دفاتر إذا كنت تخطط لاستخدام وحدات معالجة الرسوميات (TPUs) في Colaboratory. قم بتغليف كل الكود المسؤول عن التدريب في دالة، ومررها إلى [`~accelerate.notebook_launcher`]: + +```py +>>> from accelerate import notebook_launcher + +>>> notebook_launcher(training_function) +``` + +للحصول على مزيد من المعلومات حول 🤗 Accelerate وميزاته الغنية، يرجى الرجوع إلى [الوثائق](https://huggingface.co/docs/accelerate). \ No newline at end of file diff --git a/docs/source/ar/agents.md b/docs/source/ar/agents.md new file mode 100644 index 00000000000000..92b2a4715f6f07 --- /dev/null +++ b/docs/source/ar/agents.md @@ -0,0 +1,539 @@ +# الوكلاء والأدوات + +[[open-in-colab]] + +### ما هو الوكيل؟ + +يمكن للنظم اللغوية الكبيرة (LLMs) التي تم تدريبها على أداء [نمذجة اللغة السببية](./tasks/language_modeling.) التعامل مع مجموعة واسعة من المهام، ولكنها غالبًا ما تواجه صعوبات في المهام الأساسية مثل المنطق والحساب والبحث. وعندما يتم استدعاؤها في مجالات لا تؤدي فيها أداءً جيدًا، فإنها غالبًا ما تفشل في توليد الإجابة التي نتوقعها منها. + +يتمثل أحد النهج للتغلب على هذا القصور في إنشاء "وكيل". + +الوكيل هو نظام يستخدم LLM كمحرك له، ولديه حق الوصول إلى وظائف تسمى "أدوات". + +هذه "الأدوات" هي وظائف لأداء مهمة، وتحتوي على جميع الأوصاف اللازمة للوكيل لاستخدامها بشكل صحيح. + +يمكن برمجة الوكيل للقيام بما يلي: +- وضع سلسلة من الإجراءات/الأدوات وتشغيلها جميعًا في نفس الوقت مثل [`CodeAgent`] على سبيل المثال +- التخطيط للاجراءات/الأدوات وتنفيذها واحدة تلو الأخرى والانتظار حتى انتهاء كل إجراء قبل إطلاق التالي مثل [`ReactJsonAgent`] على سبيل المثال + +### أنواع الوكلاء + +#### الوكيل البرمجي (Code agent) + +يتمتع هذا الوكيل يتبع خطوات محددة: أولًا، يخطط لسلسلة من الإجراءات التي يريد تنفيذها، ثم شفرة Python لتنفيذ جميع الإجراءات في نفس الوقت. وهو يتعامل بشكل أصلي مع أنواع مختلفة من المدخلات والمخرجات للأدوات التي يستخدمها، وبالتالي فهو الخيار الموصى به للمهام متعددة الوسائط. + +#### وكلاء التفاعل + +هذا هو الوكيل الذي يتم اللجوء إليه لحل مهام الاستدلال، حيث يجعل إطار ReAct ([Yao et al.، 2022](https://huggingface.co/papers/2210.03629)) من الكفاءة حقًا التفكير على أساس ملاحظاته السابقة. + +نقوم بتنفيذ إصدارين من ReactJsonAgent: +- [`ReactJsonAgent`] يقوم بتوليد استدعاءات الأدوات كـ JSON في إخراجها. +- [`ReactCodeAgent`] هو نوع جديد من ReactJsonAgent يقوم بتوليد استدعاءات أدواته كمقاطع من التعليمات البرمجية، والتي تعمل بشكل جيد حقًا مع LLMs التي تتمتع بأداء قوي في البرمجة. + +> [!TIP] +> اقرأ منشور المدونة [Open-source LLMs as LangChain Agents](https://huggingface.co/blog/open-source-llms-as-agents) لمعرفة المزيد عن وكيل ReAct. + +![إطار عمل وكيل ReAct](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/open-source-llms-as-agents/ReAct.png) + +على سبيل المثال، إليك كيف يعمل وكيل ReAct Code طريقه من خلال السؤال التالي. + +```py3 +>>> agent.run( +... "How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need?", +... ) +=====New task===== +How many more blocks (also denoted as layers) in BERT base encoder than the encoder from the architecture proposed in Attention is All You Need? +====Agent is executing the code below: +bert_blocks = search(query="number of blocks in BERT base encoder") +print("BERT blocks:", bert_blocks) +==== +Print outputs: +BERT blocks: twelve encoder blocks + +====Agent is executing the code below: +attention_layer = search(query="number of layers in Attention is All You Need") +print("Attention layers:", attention_layer) +==== +Print outputs: +Attention layers: Encoder: The encoder is composed of a stack of N = 6 identical layers. Each layer has two sub-layers. The first is a multi-head self-attention mechanism, and the second is a simple, position- 2 Page 3 Figure 1: The Transformer - model architecture. + +====Agent is executing the code below: +bert_blocks = 12 +attention_layers = 6 +diff = bert_blocks - attention_layers +print("Difference in blocks:", diff) +final_answer(diff) +==== + +Print outputs: +Difference in blocks: 6 + +Final answer: 6 +``` + +### كيف يمكنني بناء وكيل؟ + +لتهيئة وكيل، تحتاج إلى هذه الوسائط: + +- نموذج لغوي كبير (LLM) يشكل المحرك الأساسي للوكيل. الوكيل نفسه ليس النموذج اللغوي، بل هو برنامج يستخدم النموذج اللغوي كمحرك له. +- موجه النظام (system prompt): هذه هي التعليمات التي يتم إعطاؤها للنموذج اللغوي لإنشاء مخرجاته. +- صندوق أدوات (toolbox) يختار الوكيل منه الأدوات لتنفيذها +- محلل (parser) لاستخراج الأدوات التي يجب استدعاؤها من مخرجات النموذج اللغوي LLM والأدوات التي يجب استخدامها + +عند تهيئة نظام الوكيل، يتم استخدام سمات الأداة لإنشاء وصف للأداة، ثم يتم دمجها في موجه النظام الخاص `system_prompt` للوكيل لإعلامه بالأدوات التي يمكنه استخدامها ولماذا. + +للبدء، يرجى تثبيت `agents` الإضافية لتثبيت جميع التبعيات الافتراضية. + +```bash +pip install transformers[agents] +``` + +قم ببناء محرك LLM الخاص بك من خلال تعريف طريقة `llm_engine` التي تقبل قائمة من [الرسائل](./chat_templating.) وتعيد النص. يجب أن تقبل هذه الدالة القابلة للاستدعاء أيضًا معامل `stop` يشير إلى متى يجب التوقف عن التوليد. + +```python +from huggingface_hub import login, InferenceClient + +login("") + +client = InferenceClient(model="meta-llama/Meta-Llama-3-70B-Instruct") + +def llm_engine(messages, stop_sequences=["Task"]) -> str: + response = client.chat_completion(messages, stop=stop_sequences, max_tokens=1000) + answer = response.choices[0].message.content + return answer +``` + +يمكنك استخدام أي طريقة `llm_engine` طالما أنها: +1. يتبع تنسيق [رسائل](./chat_templating.md) لإدخاله (`List [Dict [str، str]]`) ويعيد `str` +2. يتوقف عن توليد المخراجات من التسلسلات التي تم تمريرها في معامل `stop` + +أنت بحاجة أيضًا إلى معامل "الأدوات" الذي يقبل قائمة من "الأدوات". يمكنك توفير قائمة فارغة لـ "الأدوات"، ولكن استخدم صندوق الأدوات الافتراضي مع معامل اختياري `add_base_tools=True`. + +الآن يمكنك إنشاء وكيل، مثل [`CodeAgent`], وتشغيله. ولتسهيل الأمر، نقدم أيضًا فئة [`HfEngine`] التي تستخدم `huggingface_hub.InferenceClient` بشكل مخفى. + +```python +from transformers import CodeAgent, HfEngine + +llm_engine = HfEngine(model="meta-llama/Meta-Llama-3-70B-Instruct") +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and return the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +هذه الميزة ستكون مفيدة في حالة الحاجة الملحة! يمكنك حتى ترك معامل `llm_engine` غير محدد، وسيتم إنشاء [`HfEngine`] بشكل تلقائي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], add_base_tools=True) + +agent.run( + "Could you translate this sentence from French, say it out loud and give me the audio.", + sentence="Où est la boulangerie la plus proche?", +) +``` + +لاحظ أننا استخدمنا معامل "sentence" إضافي: يمكنك تمرير النص كمعامل إضافي إلى النموذج. + +يمكنك أيضًا استخدام هذا للإشارة إلى مسار الملفات المحلية أو البعيدة للنموذج لاستخدامها: + +```py +from transformers import ReactCodeAgent + +agent = ReactCodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) + +agent.run("Why does Mike not know many people in New York?", audio="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/recording.mp3") +``` + + +تم تحديد موجه النظام ومحلل المخرجات تلقائيًا، ولكن يمكنك فحصهما بسهولة عن طريق استدعاء `system_prompt_template` على وكيلك. + +```python +print(agent.system_prompt_template) +``` + +من المهم أن تشرح بأكبر قدر ممكن من الوضوح المهمة التي تريد تنفيذها. +كل عملية [`~Agent.run`] مستقلة، وبما أن الوكيل مدعوم من LLM، فقد تؤدي الاختلافات الطفيفة في موجهك إلى نتائج مختلفة تمامًا. +يمكنك أيضًا تشغيل وكيل بشكل متتالي لمهام مختلفة: في كل مرة يتم فيها إعادة تهيئة سمتي `agent.task` و`agent.logs`. + + +#### تنفيذ التعليمات البرمجية + +يقوم مفسر Python بتنفيذ التعليمات البرمجية على مجموعة من المدخلات التي يتم تمريرها جنبًا إلى جنب مع أدواتك. +يجب أن يكون هذا الأمر آمنًا لأن الوظائف الوحيدة التي يمكن استدعاؤها هي الأدوات التي قدمتها (خاصة إذا كانت أدوات من Hugging Face فقط) ووظيفة الطباعة، لذا فأنت مقيد بالفعل بما يمكن تنفيذه. + +مفسر Python لا يسمح أيضًا باستدعاء دوال بشكل افتراضي خارج قائمة آمنة، لذا فإن جميع الهجمات الأكثر وضوحًا لا ينبغي أن تكون مشكلة. +يمكنك أيضًا الإذن باستيرادات إضافية عن طريق تمرير الوحدات النمطية المصرح بها كقائمة من السلاسل في معامل `additional_authorized_imports` عند تهيئة [`ReactCodeAgent`] أو [`CodeAgent`]: + +```py +>>> from transformers import ReactCodeAgent + +>>> agent = ReactCodeAgent(tools=[], additional_authorized_imports=['requests', 'bs4']) +>>> agent.run("Could you get me the title of the page at url 'https://huggingface.co/blog'?") + +(...) +'Hugging Face – Blog' +``` + +سيتم إيقاف التنفيذ عند أي رمز يحاول تنفيذ عملية غير قانونية أو إذا كان هناك خطأ Python عادي في التعليمات البرمجية التي تم إنشاؤها بواسطة الوكيل. + +> [!WARNING] +> يمكن لـ LLM توليد شفرة برمجية عشوائية سيتم تنفيذها بعد ذلك: لا تقمب استدعاء أى دوال غير آمنة! + +### موجه النظام + +ينشئ الوكيل، أو بالأحرى LLM الذي يقود الوكيل، يولد مخرجات بناءً على موجه النظام. يمكن تخصيص موجه النظام وتصميمه للمهام المقصودة. على سبيل المثال، تحقق من موجه النظام لـ [`ReactCodeAgent`] (الإصدار أدناه مبسط قليلاً). + +```text +You will be given a task to solve as best you can. +You have access to the following tools: +<> + +To solve the task, you must plan forward to proceed in a series of steps, in a cycle of 'Thought:', 'Code:', and 'Observation:' sequences. + +At each step, in the 'Thought:' sequence, you should first explain your reasoning towards solving the task, then the tools that you want to use. +Then in the 'Code:' sequence, you shold write the code in simple Python. The code sequence must end with '/End code' sequence. +During each intermediate step, you can use 'print()' to save whatever important information you will then need. +These print outputs will then be available in the 'Observation:' field, for using this information as input for the next step. + +In the end you have to return a final answer using the `final_answer` tool. + +Here are a few examples using notional tools: +--- +{examples} + +Above example were using notional tools that might not exist for you. You only have acces to those tools: +<> +You also can perform computations in the python code you generate. + +Always provide a 'Thought:' and a 'Code:\n```py' sequence ending with '```' sequence. You MUST provide at least the 'Code:' sequence to move forward. + +Remember to not perform too many operations in a single code block! You should split the task into intermediate code blocks. +Print results at the end of each step to save the intermediate results. Then use final_answer() to return the final result. + +Remember to make sure that variables you use are all defined. + +Now Begin! +``` + +يتضمن موجه النظام: +- *مقدمة* تشرح كيف يجب أن يتصرف الوكيل والأدوات التي يجب عليه استخدامها. +- وصف لجميع الأدوات التي يتم تحديدها بواسطة رمز `<>` الذي يتم استبداله ديناميكيًا في وقت التشغيل بالأدوات التي يحددها المستخدم أو يختارها. + - يأتي وصف الأداة من سمات الأداة، `name`، و`description`، و`inputs` و`output_type`، وقالب `jinja2` بسيط يمكنك تحسينه. +- شكل المخرج المتوقع. + +يمكنك تحسين موجه النظام، على سبيل المثال، عن طريق إضافة شرح لتنسيق المخرجات. + +للحصول على أقصى قدر من المرونة، يمكنك الكتابة فوق قالب موجه النظام بالكامل عن طريق تمرير موجه مخصص كمعامل إلى معلمة `system_prompt`. + +```python +from transformers import ReactJsonAgent +from transformers.agents import PythonInterpreterTool + +agent = ReactJsonAgent(tools=[PythonInterpreterTool()], system_prompt="{your_custom_prompt}") +``` + +> [!WARNING] +> يرجى التأكد من تحديد سلسلة `<>` في مكان ما في `template` حتى يكون الوكيل على علم +بالأدوات المتاحة. + + +### فحص تشغيل الوكيل + +فيما يلي بعض السمات المفيدة لفحص ما حدث بعد التشغيل: +- تخزن `agent.logs` سجلات مفصلة للوكيل. في كل خطوة من تشغيل الوكيل، يتم تخزين كل شيء في قاموس إلحاقه بـ `agent.logs`. +- تشغيل `agent.write_inner_memory_from_logs()` يخلق ذاكرة داخلية لسجلات الوكيل للنظام LLM لعرضها، كقائمة من رسائل الدردشة. تنتقل هذه الطريقة عبر كل خطوة من سجل الوكيل ولا تخزن سوى ما يهمها كرسالة: على سبيل المثال، سيحفظ موجه النظام والمهمة في رسائل منفصلة، ثم لكل خطوة سيخزن مخرج LLM كرسالة، ومخرج استدعاء الأداة كرسالة أخرى. استخدم هذا إذا كنت تريد عرضًا عامًا لما حدث - ولكن لن يتم نسخ كل سجل بواسطة هذه الطريقة. + +## الأدوات + +الأداة هي عبارة عن وظيفة أساسية يستخدمها الوكيل لتنفيذ مهمة محددة. + +يمكنك على سبيل المثال التحقق من [`PythonInterpreterTool`]: لديه اسم ووصف ووصف للمدخلات ونوع للمخرج، وطريقة `__call__` التي تقوم بتنفيذ المهمة المطلوبة. + +عند تهيئة الوكيل، يتم استخدام سمات الأداة لتوليد وصف للأداة يتم تضمينه في موجه النظام الخاص بالوكيل. يتيح هذا للوكيل معرفة الأدوات التي يمكنه استخدامها ولماذا. + +### صندوق الأدوات الافتراضي + +يأتي Transformers مع صندوق أدوات افتراضي لتمكين الوكلاء، والذي يمكنك إضافته إلى وكيلك عند التهيئة باستخدام معامل `add_base_tools = True`: + +- **الإجابة على أسئلة المستند**: الإجابة على سؤال حول المستند (مثل ملف PDF) بتنسيق صورة ([Donut](./model_doc/donut)) +- **الإجابة على أسئلة الصور**: الإجابة على سؤال حول صورة ([VILT](./model_doc/vilt)) +- **التحدث إلى النص**: قم بتفريغ الكلام إلى نص ([Whisper](./model_doc/whisper)) +- **النص إلى كلام**: تحويل النص إلى كلام ([SpeechT5](./model_doc/speecht5)) +- **الترجمة**: ترجمة جملة معينة من لغة المصدر إلى لغة الهدف. +- **مفسر كود Python**: تشغيل كود Python الذي تم إنشاؤه بواسطة LLM في بيئة آمنة. لن يتم إضافة هذه الأداة إلى [`ReactJsonAgent`] إلا إذا استخدمت `add_base_tools=True`، نظرًا لأن الأدوات المستندة إلى التعليمات البرمجية يمكنها بالفعل تنفيذ كود Python +لا تترجم النصوص الخاصة ولا الأكواد البرمجية ولا الروابط ولا رموز HTML وCSS: + +يمكنك استخدام أداة يدويًا عن طريق استدعاء دالة [`load_tool`] وتحديد مهمة لتنفيذها. + +```python +from transformers import load_tool + +tool = load_tool("text-to-speech") +audio = tool("This is a text to speech tool") +``` + +### إنشاء أداة جديدة + +يمكنك إنشاء أداتك الخاصة لتغطية حالات الاستخدام التي لا تغطيها الأدوات الافتراضية من Hugging Face. +على سبيل المثال، دعنا نقوم بإنشاء أداة تعرض النموذج الأكثر تنزيلًا لمهمة معينة من Hub. + +سوف نبدأ بالكود التالي. + +```python +from huggingface_hub import list_models + +task = "text-classification" + +model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) +print(model.id) +``` + +يمكن تحويل هذه الشيفرة إلى فئة ترث من الفئة العليا [`Tool`]. + +تحتاج الأداة المخصصة إلى: + +- اسم `name`، والتي تمثل اسم الأداة نفسها. عادةً ما يصف الاسم وظيفتها. بما أن الكود يعيد النموذج الأكثر تنزيلًا لمهمة ما، فلنسمها `model_download_counter`. +- تستخدم خاصية `description` لملء موجه نظام الوكيل. +- خاصية `inputs`، والتي هي عبارة عن قاموس بمفاتيح "type" و"description". يحتوي على معلومات تساعد المفسر Python على اتخاذ خيارات مستنيرة بشأن المدخلات. +- خاصية `output_type`، والتي تحدد نوع المخرج. +- طريقة `forward` والتي تحتوي على الكود الذي سيتم تنفيذه للحصول على النتيجة النهائية. + +```python +from transformers import Tool +from huggingface_hub import list_models + +class HFModelDownloadsTool(Tool): + name = "model_download_counter" + description = ( + "This is a tool that returns the most downloaded model of a given task on the Hugging Face Hub. " + "It returns the name of the checkpoint." + ) + + inputs = { + "task": { + "type": "text", + "description": "the task category (such as text-classification, depth-estimation, etc)", + } + } + output_type = "text" + + def forward(self, task: str): + model = next(iter(list_models(filter=task, sort="downloads", direction=-1))) + return model.id +``` + +الآن بعد أن أصبحت فئة `HfModelDownloadsTool` المخصصة جاهزة، يمكنك حفظها في ملف باسم `model_downloads.py` واستيرادها للاستخدام. + +```python +from model_downloads import HFModelDownloadsTool + +tool = HFModelDownloadsTool() +``` + +يمكنك أيضًا مشاركة أداتك المخصصة في Hub عن طريق استدعاء [`~Tool.push_to_hub`] على الأداة. تأكد من أنك قمت بإنشاء مستودع لها على Hub وأنك تستخدم رمز وصول للقراءة. + +```python +tool.push_to_hub("{your_username}/hf-model-downloads") +``` + +قم بتحميل الأداة باستخدام دالة [`~Tool.load_tool`] ومررها إلى معلمة `tools` في الوكيل الخاص بك. + +```python +from transformers import load_tool, CodeAgent + +model_download_tool = load_tool("m-ric/hf-model-downloads") +agent = CodeAgent(tools=[model_download_tool], llm_engine=llm_engine) +agent.run( + "Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub?" +) +``` + +ستحصل على ما يلي: + +```text +======== New task ======== +Can you give me the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub? +==== Agent is executing the code below: +most_downloaded_model = model_download_counter(task="text-to-video") +print(f"The most downloaded model for the 'text-to-video' task is {most_downloaded_model}.") +==== +``` + +والناتج: + +`"النموذج الأكثر تنزيلًا لمهمة `text-to-video` هو ByteDance/AnimateDiff-Lightning."` + +### إدارة صندوق أدوات الوكيل الخاص بك + +إذا كنت قد قمت بتهيئة وكيل، فمن غير الملائم إعادة تهيئته من البداية لإضافة أداة جديدة ترغب في استخدامها. باستخدام مكتبة Transformers، يمكنك إدارة صندوق أدوات الوكيل بإضافة أو استبدال أداة موجودة. + +دعنا نضيف الأداة `model_download_tool` إلى وكيل تم تهيئته مسبقًا باستخدام صندوق الأدوات الافتراضي. + +```python +from transformers import CodeAgent + +agent = CodeAgent(tools=[], llm_engine=llm_engine, add_base_tools=True) +agent.toolbox.add_tool(model_download_tool) +``` + +الآن يمكننا الاستفادة من الأداة الجديدة وأداة تحويل النص إلى كلام السابقة: + +```python + agent.run( + "Can you read out loud the name of the model that has the most downloads in the 'text-to-video' task on the Hugging Face Hub and return the audio?" + ) +``` + +| **Audio** | +|------------------------------------------------------------------------------------------------------------------------------------------------------| +|