docs: update documentation for huggingface inference endpoints. (#539)

* docs: update documentation for huggingface inference endpoints. * fix: fix fences add mock model_id for trying
argilla-io · Apr 16, 2024 · d5d7d3e · d5d7d3e
1 parent 4ef7290
commit d5d7d3e
Showing 1 changed file with 30 additions and 4 deletions.
diff --git a/src/distilabel/llms/huggingface/inference_endpoints.py b/src/distilabel/llms/huggingface/inference_endpoints.py
@@ -59,10 +59,36 @@ class InferenceEndpointsLLM(AsyncLLM):
         use_openai_client: whether to use the OpenAI client instead of the Hugging Face client.
 
     Examples:
-        >>> from distilabel.llms.huggingface import AsyncInferenceEndpointsLLM
-        >>> llm = AsyncInferenceEndpointsLLM(model_id="model-id")
-        >>> llm.load()
-        >>> output = await llm.agenerate([{"role": "user", "content": "Hello world!"}])
+        ```python
+        from distilabel.llms.huggingface import InferenceEndpointsLLM
+
+
+        # Free serverless Inference API
+        llm = InferenceEndpointsLLM(
+            model_id="mistralai/Mistral-7B-Instruct-v0.2",
+        )
+
+        # Dedicated Inference Endpoints
+        llm = InferenceEndpointsLLM(
+            endpoint_name="<ENDPOINT_NAME>",
+            api_key="<HF_API_KEY>",
+            endpoint_namespace="<USER|ORG>",
+        )
+
+        # Dedicated Inference Endpoints or TGI
+        llm = InferenceEndpointsLLM(
+            api_key="<HF_API_KEY>",
+            base_url="<BASE_URL>",
+        )
+
+        llm.load()
+
+        # Synchrounous request
+        output = llm.generate(inputs=[[{"role": "user", "content": "Hello world!"}]])
+
+        # Asynchronous request
+        output = await llm.agenerate(input=[{"role": "user", "content": "Hello world!"}])
+        ```
     """
 
     model_id: Optional[str] = None