-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #122 from lean-dojo/external
Support external models
- Loading branch information
Showing
10 changed files
with
522 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,29 @@ | ||
Python Server for External Models | ||
================================= | ||
|
||
This folder contains code that enables running some of the leading general-purpose or math-specific models. It is also fairly easy to adapt the existing code and run other external models you would like to bring. | ||
|
||
## Requirements | ||
|
||
The setup steps are pretty simple. The script below is sufficient to run all external models already supported in this folder. If you only want to run a subset of them, you may not need all packages in the last step of pip installation. | ||
|
||
```bash | ||
conda create --name lean-copilot python=3.10 python numpy | ||
conda activate lean-copilot | ||
pip install torch --index-url https://download.pytorch.org/whl/cu121 # Depending on whether you have CUDA and the CUDA version; see https://pytorch.org/. | ||
pip install fastapi uvicorn loguru transformers openai | ||
pip install torch --index-url https://download.pytorch.org/whl/cu121 # Depending on whether you have CUDA and, if so, your CUDA version; see https://pytorch.org/. | ||
pip install fastapi uvicorn loguru transformers openai anthropic google.generativeai vllm | ||
``` | ||
|
||
|
||
## Running the Server | ||
|
||
```bash | ||
uvicorn server:app --port 23337 | ||
``` | ||
|
||
After the server is up running, you can go to `LeanCopilotTests/ModelAPIs.lean` to try your external models out! | ||
|
||
## Contributions | ||
|
||
We welcome contributions. If you think it would beneficial to add some other external models, or if you would like to make other contributions regarding the external model support in Lean Copilot, please feel free to open a PR. The main entry point is this `python` folder as well as the `ModelAPIs.lean` file under `LeanCopilotTests`. | ||
|
||
We use [`black`](https://pypi.org/project/black/) to format code in this folder. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from .oai_runner import OpenAIRunner | ||
from .hf_runner import HFTacticGenerator | ||
from .vllm_runner import VLLMTacticGenerator | ||
from .claude_runner import ClaudeRunner | ||
from .gemini_runner import GeminiRunner |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from typing import List, Tuple | ||
import os | ||
|
||
try: | ||
from anthropic import Anthropic | ||
except ImportError as e: | ||
pass | ||
from .external_parser import * | ||
|
||
|
||
class ClaudeRunner(Generator, Transformer): | ||
client = Anthropic(api_key=os.getenv("ANTHROPIC_KEY")) | ||
|
||
def __init__(self, **args): | ||
self.client_kwargs: dict[str | str] = { | ||
"model": args["model"], | ||
"temperature": args["temperature"], | ||
"max_tokens": args["max_tokens"], | ||
"top_p": args["top_p"], | ||
} | ||
self.name = self.client_kwargs["model"] | ||
|
||
def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]: | ||
prompt = pre_process_input(self.name, input + target_prefix) | ||
|
||
response = self.client.completions.create( | ||
prompt=prompt, | ||
**self.client_kwargs, | ||
) | ||
content = response.completion | ||
|
||
results = [ | ||
(post_process_output(self.name, content), 1.0) | ||
] # Currently Claude only supports one output. | ||
return choices_dedup(results) | ||
|
||
|
||
if __name__ == "__main__": | ||
generation_kwargs = { | ||
"model": "claude-3-opus", | ||
"temperature": 0.9, | ||
"max_tokens": 1024, | ||
"top_p": 0.9, | ||
} | ||
|
||
model = ClaudeRunner(**generation_kwargs) | ||
print(model.generate("n : ℕ\n⊢ gcd n n = n")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import torch | ||
import numpy as np | ||
from typing import List, Tuple | ||
from abc import ABC, abstractmethod | ||
|
||
|
||
def get_cuda_if_available(): | ||
return torch.device("cuda" if torch.cuda.is_available() else "cpu") | ||
|
||
|
||
def pre_process_input(model_name, input): | ||
if model_name == "internlm/internlm2-math-plus-1_8b": | ||
prompt = ( | ||
"My LEAN 4 state is:\n```lean\n" | ||
+ input | ||
+ "```\nPlease predict a possible tactic to help me prove the theorem." | ||
) | ||
prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n""" | ||
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview": | ||
prompt = ( | ||
"Here is a theorom you need to prove in Lean:\n" | ||
+ input | ||
+ "\nNow you should suggest one line tactic in lean code:" | ||
) | ||
elif "gemini" in model_name or "claude" in model_name: | ||
prompt = ( | ||
"Here is a theorom you need to prove in Lean:\n" | ||
+ input | ||
+ "\nNow you should suggest one line tactic in lean code:" | ||
) | ||
else: | ||
raise NotImplementedError(f"External model '{model_name}' not supported") | ||
return prompt | ||
|
||
|
||
def post_process_output(model_name, output): | ||
if model_name == "internlm/internlm2-math-plus-1_8b": | ||
result = ( | ||
output.split("assistant")[-1] | ||
.split("lean")[-1] | ||
.split("```")[0] | ||
.split("\n")[1] | ||
) | ||
elif model_name == "gpt-3.5-turbo" or model_name == "gpt-4-turbo-preview": | ||
result = output.split("lean")[-1].split("```")[0].split("\n")[1] | ||
elif "gemini" in model_name or "claude" in model_name: | ||
result = output.split("lean")[-1].split("```")[0].split("\n")[1] | ||
else: | ||
raise NotImplementedError(f"External model '{model_name}' not supported") | ||
return result | ||
|
||
|
||
def choices_dedup(output_list: List[tuple[str, float]]) -> List[tuple[str, float]]: | ||
unique_data = {} | ||
for item in output_list: | ||
if item[0] not in unique_data or item[1] > unique_data[item[0]]: | ||
unique_data[item[0]] = item[1] | ||
sorted_data = sorted(unique_data.items(), key=lambda x: x[1], reverse=True) | ||
return sorted_data | ||
|
||
|
||
class Generator(ABC): | ||
@abstractmethod | ||
def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]: | ||
pass | ||
|
||
|
||
class Encoder(ABC): | ||
@abstractmethod | ||
def encode(self, input: str) -> np.ndarray: | ||
pass | ||
|
||
|
||
class Transformer: | ||
def cuda(self) -> None: | ||
self.model.cuda() | ||
|
||
def cpu(self) -> None: | ||
self.model.cpu() | ||
|
||
@property | ||
def device(self) -> torch.device: | ||
return self.model.device |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from typing import List, Tuple | ||
import os | ||
|
||
try: | ||
import google.generativeai as genai | ||
from google.generativeai import GenerationConfig | ||
except ImportError as e: | ||
pass | ||
from .external_parser import * | ||
|
||
|
||
class GeminiRunner(Generator, Transformer): | ||
client = genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | ||
safety_settings = [ | ||
{ | ||
"category": "HARM_CATEGORY_HARASSMENT", | ||
"threshold": "BLOCK_NONE", | ||
}, | ||
{ | ||
"category": "HARM_CATEGORY_HATE_SPEECH", | ||
"threshold": "BLOCK_NONE", | ||
}, | ||
{ | ||
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", | ||
"threshold": "BLOCK_NONE", | ||
}, | ||
{ | ||
"category": "HARM_CATEGORY_DANGEROUS_CONTENT", | ||
"threshold": "BLOCK_NONE", | ||
}, | ||
] | ||
|
||
def __init__(self, **args): | ||
self.client_kwargs: dict[str | str] = { | ||
"model": args["model"], | ||
"temperature": args["temperature"], | ||
"max_tokens": args["max_tokens"], | ||
"top_p": args["top_p"], | ||
} | ||
self.name = self.client_kwargs["model"] | ||
self.client = genai.GenerativeModel(args["model"]) | ||
self.generation_config = GenerationConfig( | ||
candidate_count=1, | ||
max_output_tokens=args["max_tokens"], | ||
temperature=args["temperature"], | ||
top_p=args["top_p"], | ||
) | ||
|
||
def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]: | ||
prompt = pre_process_input(self.name, input + target_prefix) | ||
|
||
response = self.client.generate_content( | ||
prompt, | ||
generation_config=self.generation_config, | ||
safety_settings=GeminiRunner.safety_settings, | ||
) | ||
|
||
results = [ | ||
(post_process_output(self.name, response.text), 1.0) | ||
] # Currently Gemini only supports one output. | ||
return choices_dedup(results) | ||
|
||
|
||
if __name__ == "__main__": | ||
generation_kwargs = { | ||
"model": "gemini-1.0-pro", | ||
"temperature": 0.9, | ||
"max_tokens": 1024, | ||
"top_p": 0.9, | ||
} | ||
|
||
model = GeminiRunner(**generation_kwargs) | ||
print(model.generate("n : ℕ\n⊢ gcd n n = n")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import torch | ||
from loguru import logger | ||
from typing import List, Tuple | ||
from transformers import ( | ||
AutoModelForCausalLM, | ||
AutoTokenizer, | ||
) | ||
from .external_parser import * | ||
|
||
|
||
class HFTacticGenerator(Generator, Transformer): | ||
def __init__(self, **args) -> None: | ||
self.name = args["model"] | ||
self.tokenizer = AutoTokenizer.from_pretrained( | ||
self.name, trust_remote_code=True | ||
) | ||
device = args["device"] | ||
if device == "auto": | ||
device = get_cuda_if_available() | ||
else: | ||
device = torch.device(device) | ||
logger.info(f"Loading {self.name} on {device}") | ||
self.model = AutoModelForCausalLM.from_pretrained( | ||
self.name, trust_remote_code=True | ||
).to(device) | ||
|
||
self.generation_args: dict[str | str] = { | ||
"do_sample": args["do_sample"], | ||
"temperature": args["temperature"], # chat default is 0.8. | ||
"max_new_tokens": args["max_new_tokens"], | ||
"top_p": args["top_p"], # chat default is 0.8. | ||
"num_return_sequences": args["num_return_sequences"], | ||
# "num_beams": self.num_return_sequences, # Here if we use beam search for llms the output are not diverse (few tactics are provided). | ||
"output_scores": args["output_scores"], | ||
"output_logits": args["output_logits"], | ||
"return_dict_in_generate": args["return_dict_in_generate"], | ||
} | ||
|
||
def generate(self, input: str, target_prefix: str = "") -> List[Tuple[str, float]]: | ||
prompt = input + target_prefix | ||
'''prompt= 'Here is a theorom you need to prove in Lean:\n'+prompt+'\nNow you should suggest one line tactic in lean code:' | ||
prompt = f"""<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n""" | ||
''' | ||
prompt = pre_process_input(self.name, prompt) | ||
|
||
self.model = self.model.eval() | ||
|
||
tokenized_input = self.tokenizer(prompt, return_tensors="pt") | ||
eos_token_id = [ | ||
self.tokenizer.eos_token_id, | ||
self.tokenizer.convert_tokens_to_ids(["<|im_end|>"])[0], | ||
] | ||
outputs = self.model.generate( | ||
tokenized_input.input_ids.to(self.device), | ||
eos_token_id=eos_token_id, | ||
**self.generation_args, | ||
) | ||
response = self.tokenizer.batch_decode( | ||
outputs["sequences"], skip_special_tokens=True | ||
) | ||
|
||
result = [] | ||
index = 0 | ||
for out, score in zip(response, outputs.scores): | ||
out = post_process_output(self.name, out) | ||
result.append((out, score[index].exp().sum().log().cpu().item())) | ||
index += 1 | ||
result = choices_dedup(result) | ||
return result | ||
|
||
|
||
if __name__ == "__main__": | ||
generation_kwargs = { | ||
"model": "internlm/internlm2-math-plus-1_8b", | ||
"temperature": 0.6, | ||
"max_new_tokens": 1024, | ||
"top_p": 0.9, | ||
"length_penalty": 0, | ||
"num_return_sequences": 64, | ||
"do_sample": True, | ||
"output_scores": True, | ||
"output_logits": False, | ||
"return_dict_in_generate": True, | ||
"device": "auto", | ||
} | ||
model = HFTacticGenerator(**generation_kwargs) | ||
model.cuda() | ||
print(model.generate("n : ℕ\n⊢ gcd n n = n")) |
Oops, something went wrong.