Skip to content

Commit 84394fa

Browse files
committed
Fetching problems in batches using new leetcode api
1 parent 0332d55 commit 84394fa

File tree

3 files changed

+184
-53
lines changed

3 files changed

+184
-53
lines changed

generate.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,6 @@ async def generate_anki_note(
5757
leetcode_data: leetcode_anki.helpers.leetcode.LeetcodeData,
5858
leetcode_model: genanki.Model,
5959
leetcode_task_handle: str,
60-
leetcode_task_title: str,
61-
topic: str,
6260
) -> LeetcodeNote:
6361
"""
6462
Generate a single Anki flashcard
@@ -68,8 +66,8 @@ async def generate_anki_note(
6866
fields=[
6967
leetcode_task_handle,
7068
str(await leetcode_data.problem_id(leetcode_task_handle)),
71-
leetcode_task_title,
72-
topic,
69+
str(await leetcode_data.title(leetcode_task_handle)),
70+
str(await leetcode_data.category(leetcode_task_handle)),
7371
await leetcode_data.description(leetcode_task_handle),
7472
await leetcode_data.difficulty(leetcode_task_handle),
7573
"yes" if await leetcode_data.paid(leetcode_task_handle) else "no",
@@ -158,24 +156,24 @@ async def generate(start: int, stop: int) -> None:
158156
],
159157
)
160158
leetcode_deck = genanki.Deck(LEETCODE_ANKI_DECK_ID, "leetcode")
161-
leetcode_data = leetcode_anki.helpers.leetcode.LeetcodeData()
159+
160+
leetcode_data = leetcode_anki.helpers.leetcode.LeetcodeData(start, stop)
162161

163162
note_generators: List[Coroutine[Any, Any, LeetcodeNote]] = []
164163

165-
for topic, leetcode_task_title, leetcode_task_handle in list(
166-
leetcode_anki.helpers.leetcode.get_leetcode_task_handles()
167-
)[start:stop]:
164+
task_handles = await leetcode_data.all_problems_handles()
165+
166+
logging.info("Generating flashcards")
167+
for leetcode_task_handle in task_handles:
168168
note_generators.append(
169169
generate_anki_note(
170170
leetcode_data,
171171
leetcode_model,
172172
leetcode_task_handle,
173-
leetcode_task_title,
174-
topic,
175173
)
176174
)
177175

178-
for leetcode_note in tqdm(note_generators):
176+
for leetcode_note in tqdm(note_generators, unit="flashcard"):
179177
leetcode_deck.add_note(await leetcode_note)
180178

181179
genanki.Package(leetcode_deck).write_to_file(OUTPUT_FILE)

leetcode_anki/helpers/leetcode.py

Lines changed: 174 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
import functools
33
import json
44
import logging
5+
import math
56
import os
67
import time
78
from functools import lru_cache
8-
from typing import Callable, Dict, Iterator, List, Tuple, Type
9-
10-
import diskcache # type: ignore
9+
from typing import Callable, Dict, List, Tuple, Type, Union
1110

1211
# https://github.com/prius/python-leetcode
1312
import leetcode.api.default_api # type: ignore
@@ -16,7 +15,11 @@
1615
import leetcode.configuration # type: ignore
1716
import leetcode.models.graphql_query # type: ignore
1817
import leetcode.models.graphql_query_get_question_detail_variables # type: ignore
18+
import leetcode.models.graphql_query_problemset_question_list_variables # type: ignore
19+
import leetcode.models.graphql_query_problemset_question_list_variables_filter_input # type: ignore
20+
import leetcode.models.graphql_question_detail # type: ignore
1921
import urllib3 # type: ignore
22+
from tqdm import tqdm
2023

2124
CACHE_DIR = "cache"
2225

@@ -49,20 +52,6 @@ def _get_leetcode_api_client() -> leetcode.api.default_api.DefaultApi:
4952
return api_instance
5053

5154

52-
def get_leetcode_task_handles() -> Iterator[Tuple[str, str, str]]:
53-
"""
54-
Get task handles for all the leetcode problems.
55-
"""
56-
api_instance = _get_leetcode_api_client()
57-
58-
for topic in ["algorithms", "database", "shell", "concurrency"]:
59-
api_response = api_instance.api_problems_topic_get(topic=topic)
60-
for stat_status_pair in api_response.stat_status_pairs:
61-
stat = stat_status_pair.stat
62-
63-
yield (topic, stat.question__title, stat.question__title_slug)
64-
65-
6655
def retry(times: int, exceptions: Tuple[Type[Exception]], delay: float) -> Callable:
6756
"""
6857
Retry Decorator
@@ -98,44 +87,117 @@ class LeetcodeData:
9887
names.
9988
"""
10089

101-
def __init__(self) -> None:
90+
def __init__(self, start: int, stop: int) -> None:
10291
"""
10392
Initialize leetcode API and disk cache for API responses
10493
"""
94+
if start < 0:
95+
raise ValueError(f"Start must be non-negative: {start}")
96+
97+
if stop < 0:
98+
raise ValueError(f"Stop must be non-negative: {start}")
99+
100+
if start > stop:
101+
raise ValueError(f"Start (){start}) must be not greater than stop ({stop})")
102+
103+
self._start = start
104+
self._stop = stop
105+
105106
self._api_instance = _get_leetcode_api_client()
106107

107-
if not os.path.exists(CACHE_DIR):
108-
os.mkdir(CACHE_DIR)
109-
self._cache = diskcache.Cache(CACHE_DIR)
108+
self._cache_container: Dict[
109+
str, leetcode.models.graphql_question_detail.GraphqlQuestionDetail
110+
] = {}
110111

111-
@retry(times=3, exceptions=(urllib3.exceptions.ProtocolError,), delay=5)
112-
async def _get_problem_data(self, problem_slug: str) -> Dict[str, str]:
112+
@property
113+
async def _cache(
114+
self,
115+
) -> Dict[str, leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
113116
"""
114-
Get data about a specific problem (method output if cached to reduce
115-
the load on the leetcode API)
117+
Cached method to return dict (problem_slug -> question details)
116118
"""
117-
if problem_slug in self._cache:
118-
return self._cache[problem_slug]
119+
cache = self._cache_container
120+
121+
if not cache:
122+
problems = await self._get_problems_data()
123+
cache = {problem.title_slug: problem for problem in problems}
124+
125+
self._cache_container = cache
126+
127+
return cache
119128

129+
@retry(times=3, exceptions=(urllib3.exceptions.ProtocolError,), delay=5)
130+
async def _get_problems_count(self) -> int:
120131
api_instance = self._api_instance
121132

122133
graphql_request = leetcode.models.graphql_query.GraphqlQuery(
123134
query="""
124-
query getQuestionDetail($titleSlug: String!) {
125-
question(titleSlug: $titleSlug) {
126-
freqBar
135+
query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
136+
problemsetQuestionList: questionList(
137+
categorySlug: $categorySlug
138+
limit: $limit
139+
skip: $skip
140+
filters: $filters
141+
) {
142+
totalNum
143+
}
144+
}
145+
""",
146+
variables=leetcode.models.graphql_query_problemset_question_list_variables.GraphqlQueryProblemsetQuestionListVariables(
147+
category_slug="",
148+
limit=1,
149+
skip=0,
150+
filters=leetcode.models.graphql_query_problemset_question_list_variables_filter_input.GraphqlQueryProblemsetQuestionListVariablesFilterInput(
151+
tags=[],
152+
# difficulty="MEDIUM",
153+
# status="NOT_STARTED",
154+
# list_id="7p5x763", # Top Amazon Questions
155+
# premium_only=False,
156+
),
157+
),
158+
operation_name="problemsetQuestionList",
159+
)
160+
161+
# Critical section. Don't allow more than one parallel request to
162+
# the Leetcode API
163+
async with leetcode_api_access_lock:
164+
time.sleep(2) # Leetcode has a rate limiter
165+
data = api_instance.graphql_post(body=graphql_request).data
166+
167+
return data.problemset_question_list.total_num or 0
168+
169+
@retry(times=3, exceptions=(urllib3.exceptions.ProtocolError,), delay=5)
170+
async def _get_problems_data_page(
171+
self, offset: int, page_size: int, page: int
172+
) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
173+
api_instance = self._api_instance
174+
graphql_request = leetcode.models.graphql_query.GraphqlQuery(
175+
query="""
176+
query problemsetQuestionList($categorySlug: String, $limit: Int, $skip: Int, $filters: QuestionListFilterInput) {
177+
problemsetQuestionList: questionList(
178+
categorySlug: $categorySlug
179+
limit: $limit
180+
skip: $skip
181+
filters: $filters
182+
) {
183+
total: totalNum
184+
questions: data {
127185
questionId
128186
questionFrontendId
129187
boundTopicId
130188
title
189+
titleSlug
190+
categoryTitle
191+
frequency
192+
freqBar
131193
content
132194
translatedTitle
133-
translatedContent
134195
isPaidOnly
135196
difficulty
136197
likes
137198
dislikes
138199
isLiked
200+
isFavor
139201
similarQuestions
140202
contributors {
141203
username
@@ -158,42 +220,100 @@ async def _get_problem_data(self, problem_slug: str) -> Dict[str, str]:
158220
__typename
159221
}
160222
stats
223+
acRate
224+
codeDefinition
161225
hints
162226
solution {
163227
id
164228
canSeeDetail
165229
__typename
166230
}
231+
hasSolution
232+
hasVideoSolution
167233
status
168234
sampleTestCase
235+
enableRunCode
169236
metaData
237+
translatedContent
170238
judgerAvailable
171239
judgeType
172240
mysqlSchemas
173-
enableRunCode
174241
enableTestMode
175242
envInfo
176243
__typename
177-
}
178244
}
245+
}
246+
}
179247
""",
180-
variables=leetcode.models.graphql_query_get_question_detail_variables.GraphqlQueryGetQuestionDetailVariables( # noqa: E501
181-
title_slug=problem_slug
248+
variables=leetcode.models.graphql_query_problemset_question_list_variables.GraphqlQueryProblemsetQuestionListVariables(
249+
category_slug="",
250+
limit=page_size,
251+
skip=offset + page * page_size,
252+
filters=leetcode.models.graphql_query_problemset_question_list_variables_filter_input.GraphqlQueryProblemsetQuestionListVariablesFilterInput(),
182253
),
183-
operation_name="getQuestionDetail",
254+
operation_name="problemsetQuestionList",
184255
)
185256

186257
# Critical section. Don't allow more than one parallel request to
187258
# the Leetcode API
188259
async with leetcode_api_access_lock:
189260
time.sleep(2) # Leetcode has a rate limiter
190-
data = api_instance.graphql_post(body=graphql_request).data.question
191-
192-
# Save data in the cache
193-
self._cache[problem_slug] = data
261+
data = api_instance.graphql_post(
262+
body=graphql_request
263+
).data.problemset_question_list.questions
194264

195265
return data
196266

267+
async def _get_problems_data(
268+
self,
269+
) -> List[leetcode.models.graphql_question_detail.GraphqlQuestionDetail]:
270+
problem_count = await self._get_problems_count()
271+
272+
if self._start > problem_count:
273+
raise ValueError(
274+
"Start ({self._start}) is greater than problems count ({problem_count})"
275+
)
276+
277+
start = self._start
278+
stop = min(self._stop, problem_count)
279+
280+
page_size = min(50, stop - start + 1)
281+
282+
problems: List[
283+
leetcode.models.graphql_question_detail.GraphqlQuestionDetail
284+
] = []
285+
286+
logging.info(f"Fetching {stop - start + 1} problems {page_size} per page")
287+
288+
for page in tqdm(
289+
range(math.ceil((stop - start + 1) / page_size)),
290+
unit="problem",
291+
unit_scale=page_size,
292+
):
293+
data = await self._get_problems_data_page(start, page_size, page)
294+
problems.extend(data)
295+
296+
return problems
297+
298+
async def all_problems_handles(self) -> List[str]:
299+
"""
300+
Get all problem handles known.
301+
302+
Example: ["two-sum", "three-sum"]
303+
"""
304+
return list((await self._cache).keys())
305+
306+
async def _get_problem_data(
307+
self, problem_slug: str
308+
) -> leetcode.models.graphql_question_detail.GraphqlQuestionDetail:
309+
"""
310+
TODO: Legacy method. Needed in the old architecture. Can be replaced
311+
with direct cache calls later.
312+
"""
313+
cache = await self._cache
314+
if problem_slug in cache:
315+
return cache[problem_slug]
316+
197317
async def _get_description(self, problem_slug: str) -> str:
198318
"""
199319
Problem description
@@ -296,3 +416,17 @@ async def freq_bar(self, problem_slug: str) -> float:
296416
"""
297417
data = await self._get_problem_data(problem_slug)
298418
return data.freq_bar or 0
419+
420+
async def title(self, problem_slug: str) -> float:
421+
"""
422+
Returns problem title
423+
"""
424+
data = await self._get_problem_data(problem_slug)
425+
return data.title
426+
427+
async def category(self, problem_slug: str) -> float:
428+
"""
429+
Returns problem category title
430+
"""
431+
data = await self._get_problem_data(problem_slug)
432+
return data.category_title

requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
python-leetcode==1.1.0
1+
python-leetcode==1.2.1
22
setuptools==57.5.0
3-
diskcache
43
genanki
54
tqdm

0 commit comments

Comments
 (0)