xorbitsai · mikeshi80 · May 8, 2024 · May 8, 2024 · May 8, 2024 · May 9, 2024
diff --git a/setup.cfg b/setup.cfg
@@ -292,7 +292,7 @@ exclude =
 
 [codespell]
 ignore-words-list = hist,rcall,fpr,ser,nd,inout,ot,Ba,ba,asend,hart,coo,splitted,datas,fro
-skip = .idea,.git,./build,./docs/build,node_modules,static,generated,*.po,*.ts,*.json,*.c,*.cpp,*.cfg,thirdparty
+skip = .idea,.git,./build,./docs/build,node_modules,static,generated,*.po,*.ts,*.json,*.c,*.cpp,*.cfg,thirdparty,xinference/model/llm/lang_utils.py
 
 [isort]
 profile = black

diff --git a/setup.py b/setup.py
@@ -73,6 +73,7 @@ class CustomDevelop(ExtraCommandMixin, develop):
 class CustomSDist(ExtraCommandMixin, sdist):
     pass
 
+
 class BuildWeb(Command):
     """build_web command"""
 

diff --git a/xinference/api/restful_api.py b/xinference/api/restful_api.py
@@ -62,6 +62,7 @@
     ChatCompletionMessage,
     Completion,
     CreateChatCompletion,
+    CreateCodeCompletion,
     CreateCompletion,
     ImageList,
     PeftModelConfig,
@@ -158,6 +159,8 @@ class BuildGradioInterfaceRequest(BaseModel):
     model_ability: List[str]
     model_description: str
     model_lang: List[str]
+    infill_supported: Optional[bool]
+    repo_level_supported: Optional[bool]
 
 
 class BuildGradioImageInterfaceRequest(BaseModel):
@@ -258,6 +261,9 @@ async def internal_exception_handler(request: Request, exc: Exception):
         self._router.add_api_route(
             "/v1/models/prompts", self._get_builtin_prompts, methods=["GET"]
         )
+        self._router.add_api_route(
+            "/v1/models/code_prompts", self._get_builtin_code_prompts, methods=["GET"]
+        )
         self._router.add_api_route(
             "/v1/models/families", self._get_builtin_families, methods=["GET"]
         )
@@ -554,6 +560,29 @@ async def internal_exception_handler(request: Request, exc: Exception):
             ),
         )
 
+        self._router.add_api_route(
+            "/v1/code/completions",
+            self.create_code_completion,
+            methods=["POST"],
+            response_model=Completion,
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+
+        self._router.add_api_route(
+            "/v1/code/prompt",
+            self.get_code_prompt,
+            methods=["POST"],
+            dependencies=(
+                [Security(self._auth_service, scopes=["models:read"])]
+                if self.is_authenticated()
+                else None
+            ),
+        )
+
         # for custom models
         self._router.add_api_route(
             "/v1/model_registrations/{model_type}",
@@ -743,6 +772,18 @@ async def _get_builtin_prompts(self) -> JSONResponse:
             logger.error(e, exc_info=True)
             raise HTTPException(status_code=500, detail=str(e))
 
+    async def _get_builtin_code_prompts(self) -> JSONResponse:
+        """
+        For internal usage
+        :return:
+        """
+        try:
+            data = await (await self._get_supervisor_ref()).get_builtin_code_prompts()
+            return JSONResponse(content=data)
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            raise HTTPException(status_code=500, detail=str(e))
+
     async def _get_builtin_families(self) -> JSONResponse:
         """
         For internal usage
@@ -1003,6 +1044,8 @@ async def build_gradio_interface(
                 model_description=body.model_description,
                 model_lang=body.model_lang,
                 access_token=access_token,
+                infill_supported=body.infill_supported,
+                repo_level_supported=body.repo_level_supported,
             ).build()
             gr.mount_gradio_app(self._app, interface, f"/{model_uid}")
         except ValueError as ve:
@@ -1763,6 +1806,115 @@ async def stream_results():
                 self.handle_request_limit_error(e)
                 raise HTTPException(status_code=500, detail=str(e))
 
+    async def create_code_completion(self, request: Request) -> Response:
+        json_data = await request.json()
+
+        if "mode" in json_data and json_data["mode"] not in ("completion", "infill"):
+            raise HTTPException(
+                status_code=400,
+                detail="mode must be one of 'completion' or 'infill'",
+            )
+
+        if json_data.get("stream", False):
+            json_data["stream"] = False
+
+        body = CreateCodeCompletion.parse_obj(json_data)
+        exclude = {
+            "mode",
+            "prompt",
+            "file_path",
+            "suffix",
+            "repo_name",
+            "files",
+            "model",
+            "n",
+            "messages",
+            "logit_bias",
+            "logit_bias_type",
+            "user",
+        }
+
+        kwargs = body.dict(exclude_unset=True, exclude=exclude)
+
+        # TODO: Decide if this default value override is necessary #1061
+        if body.max_tokens is None:
+            kwargs["max_tokens"] = max_tokens_field.default
+
+        if body.logit_bias is not None:
+            raise HTTPException(status_code=501, detail="Not implemented")
+
+        model_uid = body.model
+
+        try:
+            model = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+
+        assert not body.stream
+
+        try:
+            data = await model.code_generate(
+                body.mode,
+                body.prompt,
+                body.file_path,
+                body.suffix,
+                body.repo_name,
+                body.files,
+                kwargs,
+            )
+            return Response(content=data, media_type="application/json")
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            self.handle_request_limit_error(e)
+            raise HTTPException(status_code=500, detail=str(e))
+
+    async def get_code_prompt(self, request: Request) -> Response:
+        json_data = await request.json()
+
+        if "mode" in json_data and json_data["mode"] not in ("completion", "infill"):
+            raise HTTPException(
+                status_code=400,
+                detail="mode must be one of 'completion' or 'infill'",
+            )
+
+        body = CreateCodeCompletion.parse_obj(json_data)
+
+        model_uid = body.model
+
+        try:
+            model = await (await self._get_supervisor_ref()).get_model(model_uid)
+        except ValueError as ve:
+            logger.error(str(ve), exc_info=True)
+            await self._report_error_event(model_uid, str(ve))
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            raise HTTPException(status_code=500, detail=str(e))
+
+        try:
+            code_prompt = await model.get_code_prompt(
+                body.mode,
+                body.prompt,
+                body.file_path,
+                body.suffix,
+                body.repo_name,
+                body.files,
+            )
+            return Response(content=code_prompt, media_type="application/json")
+        except Exception as e:
+            logger.error(e, exc_info=True)
+            await self._report_error_event(model_uid, str(e))
+            self.handle_request_limit_error(e)
+            raise HTTPException(status_code=500, detail=str(e))
+
     async def query_engines_by_model_name(self, model_name: str) -> JSONResponse:
         try:
             content = await (