From 029b1861e9fdec54250972732c4f3a829fc60067 Mon Sep 17 00:00:00 2001 From: eduardocerqueira Date: Mon, 2 Sep 2024 17:11:30 +0000 Subject: [PATCH] 2024-09-02 17:11:30.186703 new snippets --- seeker/report.txt | 48 +++ seeker/snippet/UnmodifiableList.java | 19 ++ seeker/snippet/WikiLocation.java | 88 ----- seeker/snippet/WikiParser.java | 117 ------- .../snippet/adb-shell-as-root-dump-vendor.sh | 5 - seeker/snippet/bench.py | 139 -------- seeker/snippet/chat.py | 94 ------ .../collect_data_openai_sample_imports.py | 23 -- seeker/snippet/csv2star.py | 55 --- seeker/snippet/deepseek_manifold.py | 100 ++++++ seeker/snippet/disable.bash | 13 - seeker/snippet/docker_iptables | 136 -------- seeker/snippet/env.go | 56 +++ seeker/snippet/error-handling-flask.py | 15 - seeker/snippet/extract.go | 131 +++++++ seeker/snippet/flask.py | 12 - seeker/snippet/forty.java | 31 -- seeker/snippet/forty_three.java | 25 ++ seeker/snippet/hurl.py | 32 ++ seeker/snippet/latest_tag_of.sh | 14 + seeker/snippet/mat_demo_v1.py | 34 -- seeker/snippet/pipes_1.py | 50 --- seeker/snippet/radar_chart_example.py | 120 ------- seeker/snippet/repack.sh | 56 --- seeker/snippet/repackepub.sh | 5 - seeker/snippet/run.sh | 24 ++ seeker/snippet/runner.go | 112 ------ .../spbpu_schedule_to_google_calendar.py | 262 -------------- seeker/snippet/swap.sh | 5 + seeker/snippet/switch-desktop-env.sh | 5 + seeker/snippet/test.py | 129 ------- seeker/snippet/tts_backup.py | 182 ---------- seeker/snippet/vendor_sleuth.sh | 31 -- seeker/snippet/vlm_rag.py | 319 ------------------ 34 files changed, 459 insertions(+), 2028 deletions(-) create mode 100644 seeker/snippet/UnmodifiableList.java delete mode 100644 seeker/snippet/WikiLocation.java delete mode 100644 seeker/snippet/WikiParser.java delete mode 100644 seeker/snippet/adb-shell-as-root-dump-vendor.sh delete mode 100644 seeker/snippet/bench.py delete mode 100644 seeker/snippet/chat.py delete mode 100644 seeker/snippet/collect_data_openai_sample_imports.py delete mode 100644 seeker/snippet/csv2star.py create mode 100644 seeker/snippet/deepseek_manifold.py delete mode 100644 seeker/snippet/disable.bash delete mode 100644 seeker/snippet/docker_iptables create mode 100644 seeker/snippet/env.go delete mode 100644 seeker/snippet/error-handling-flask.py create mode 100644 seeker/snippet/extract.go delete mode 100644 seeker/snippet/flask.py delete mode 100644 seeker/snippet/forty.java create mode 100644 seeker/snippet/forty_three.java create mode 100644 seeker/snippet/hurl.py create mode 100644 seeker/snippet/latest_tag_of.sh delete mode 100644 seeker/snippet/mat_demo_v1.py delete mode 100644 seeker/snippet/pipes_1.py delete mode 100644 seeker/snippet/radar_chart_example.py delete mode 100644 seeker/snippet/repack.sh delete mode 100644 seeker/snippet/repackepub.sh create mode 100644 seeker/snippet/run.sh delete mode 100644 seeker/snippet/runner.go delete mode 100644 seeker/snippet/spbpu_schedule_to_google_calendar.py create mode 100644 seeker/snippet/swap.sh create mode 100644 seeker/snippet/switch-desktop-env.sh delete mode 100644 seeker/snippet/test.py delete mode 100644 seeker/snippet/tts_backup.py delete mode 100644 seeker/snippet/vendor_sleuth.sh delete mode 100644 seeker/snippet/vlm_rag.py diff --git a/seeker/report.txt b/seeker/report.txt index 005514de..38008a8c 100644 --- a/seeker/report.txt +++ b/seeker/report.txt @@ -1,3 +1,51 @@ +-------------------------------------------------------------------------------- + 2024-09-02 17:11:30.186703 +-------------------------------------------------------------------------------- + On branch main +Your branch is up to date with 'origin/main'. + +Changes not staged for commit: + (use "git add/rm ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + deleted: snippet/WikiLocation.java + deleted: snippet/WikiParser.java + deleted: snippet/adb-shell-as-root-dump-vendor.sh + deleted: snippet/bench.py + deleted: snippet/chat.py + deleted: snippet/collect_data_openai_sample_imports.py + deleted: snippet/csv2star.py + deleted: snippet/disable.bash + deleted: snippet/docker_iptables + deleted: snippet/error-handling-flask.py + deleted: snippet/flask.py + deleted: snippet/forty.java + deleted: snippet/mat_demo_v1.py + deleted: snippet/pipes_1.py + deleted: snippet/radar_chart_example.py + deleted: snippet/repack.sh + deleted: snippet/repackepub.sh + deleted: snippet/runner.go + deleted: snippet/spbpu_schedule_to_google_calendar.py + deleted: snippet/test.py + deleted: snippet/tts_backup.py + deleted: snippet/vendor_sleuth.sh + deleted: snippet/vlm_rag.py + +Untracked files: + (use "git add ..." to include in what will be committed) + snippet/UnmodifiableList.java + snippet/deepseek_manifold.py + snippet/env.go + snippet/extract.go + snippet/forty_three.java + snippet/hurl.py + snippet/latest_tag_of.sh + snippet/run.sh + snippet/swap.sh + snippet/switch-desktop-env.sh + +no changes added to commit (use "git add" and/or "git commit -a") + -------------------------------------------------------------------------------- 2024-08-30 17:12:40.941245 -------------------------------------------------------------------------------- diff --git a/seeker/snippet/UnmodifiableList.java b/seeker/snippet/UnmodifiableList.java new file mode 100644 index 00000000..3a0abe22 --- /dev/null +++ b/seeker/snippet/UnmodifiableList.java @@ -0,0 +1,19 @@ +//date: 2024-09-02T16:44:47Z +//url: https://api.github.com/gists/80bdaec048a85c75aea97e18c2230ce9 +//owner: https://api.github.com/users/cyurtoz + +// Creating a mutable ArrayList +List originalList = new ArrayList<>(); +originalList.add(1); +originalList.add(2); + +// Creating an unmodifiableList on top of the mutable ArrayList +List unmodifiableList = Collections.unmodifiableList(originalList); +System.out.println(unmodifiableList); // prints [1, 2] + +unmodifiableList.add(3); // throws java.lang.UnsupportedOperationException + +// add 3 to underlying list +originalList.add(3); +System.out.println(unmodifiableList); // prints [1, 2, 3] +// data on unmodifiableList has changed \ No newline at end of file diff --git a/seeker/snippet/WikiLocation.java b/seeker/snippet/WikiLocation.java deleted file mode 100644 index 4bf32c46..00000000 --- a/seeker/snippet/WikiLocation.java +++ /dev/null @@ -1,88 +0,0 @@ -//date: 2024-08-29T17:03:49Z -//url: https://api.github.com/gists/a52fb5d70dcef6d431363b0f431de4e6 -//owner: https://api.github.com/users/pagetronic - -package live.page.wiki; - -import live.page.hubd.system.json.Json; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -public class WikiLocation { - public static List findLocation(Json data) { - for (String key : data.keySet()) { - if (key.matches("object location|object location dec|location|location dec|camera location|camera location dec")) { - List coordinates = convertCoordinates(data.getList(key)); - if (coordinates != null) { - return coordinates; - } - } - if (Json.class.isAssignableFrom(data.get(key).getClass())) { - List coordinates = findLocation(data.getJson(key)); - if (coordinates != null) { - return coordinates; - } - } - if (List.class.isAssignableFrom(data.get(key).getClass()) && - !data.getList(key).isEmpty()) { - for (Object item : data.getList(key)) { - if (item != null && Json.class.isAssignableFrom(item.getClass())) { - List coordinates = findLocation((Json) item); - if (coordinates != null) { - return coordinates; - } - } - } - } - } - return null; - } - - - private static List convertCoordinates(List coordinates) { - if (coordinates == null) { - return null; - } - coordinates = new ArrayList<>(coordinates); - for (int key : new int[]{8, 2}) { - if (coordinates.size() > key) { - for (String start : new String[]{ - "source", "alt", "type", - "heading", "region", "zoom", "scale", - "...", "sl", "dim", "view"}) { - if (coordinates.get(key).trim().toLowerCase().startsWith(start) || coordinates.get(key).trim().isEmpty()) { - coordinates.remove(key); - break; - } - } - } - } - - try { - if (coordinates.size() >= 8 && - (coordinates.get(3).equals("N") || coordinates.get(3).equals("S")) && - (coordinates.get(7).equals("E") || coordinates.get(7).equals("W")) - ) { - return Arrays.asList( - convertCoordinates(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1)), Double.parseDouble(coordinates.get(2)), coordinates.get(3)), - convertCoordinates(Double.parseDouble(coordinates.get(4)), Double.parseDouble(coordinates.get(5)), Double.parseDouble(coordinates.get(6)), coordinates.get(7))); - } else { - return Arrays.asList(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1))); - } - } catch (Exception e) { - e.printStackTrace(); - } - return null; - } - - private static double convertCoordinates(double degree, double minute, double second, String heading) { - double decimalDegrees = degree + (minute / 60.0) + (second / 3600.0); - if ("W".equals(heading) || "S".equals(heading)) { - decimalDegrees = -decimalDegrees; - } - - return decimalDegrees; - } -} diff --git a/seeker/snippet/WikiParser.java b/seeker/snippet/WikiParser.java deleted file mode 100644 index 4df00038..00000000 --- a/seeker/snippet/WikiParser.java +++ /dev/null @@ -1,117 +0,0 @@ -//date: 2024-08-29T17:03:49Z -//url: https://api.github.com/gists/a52fb5d70dcef6d431363b0f431de4e6 -//owner: https://api.github.com/users/pagetronic - -package live.page.wiki; - -import info.bliki.wiki.filter.PlainTextConverter; -import info.bliki.wiki.model.WikiModel; -import live.page.hubd.system.json.Json; -import live.page.hubd.system.utils.Fx; -import org.apache.commons.text.StringEscapeUtils; - -import java.io.IOException; -import java.net.URLEncoder; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -public class WikiParser extends WikiModel { - - final Json data = new Json(); - - public WikiParser() { - super("", ""); - } - - public static Json getInfos(String title, String text) { - WikiParser wikiModel = new WikiParser(); - try { - wikiModel.render(new PlainTextConverter(), text, new StringBuilder(), true, true); - } catch (IOException e) { - Fx.log("\n#{" + url(title) + "}"); - } - - Json data = new Json(); - - if (wikiModel.data.containsKey("information")) { - - Json information = wikiModel.data.getJson("information"); - - Json description = new Json(); - if (information.containsKey("description") && String.class.isAssignableFrom(information.get("description").getClass())) { - description.put("int", information.getString("description")); - } else { - description = information.getJson("description"); - if (description != null && description.containsKey("langswitch")) { - description = description.getJson("langswitch"); - } - } - data.put("description", description); - - if (description == null || description.isEmpty()) { - Fx.log("\nD{" + url(title) + "}"); - data.put("description", StringEscapeUtils.unescapeHtml4(title).split("\\.")[0]); - } - if (information.containsKey("author")) { - data.put("author", information.get("author")); - } - - } - List coordinates = WikiLocation.findLocation(wikiModel.data); - if (coordinates == null) { - Fx.log("\nL{" + url(title) + "}"); - } - data.put("coordinates", coordinates); - data.put("data", wikiModel.data); - return data; - } - - private static String url(String title) { - return " https://commons.wikimedia.org/wiki/" + URLEncoder.encode(StringEscapeUtils.unescapeHtml4(title), StandardCharsets.UTF_8).replace("+", "%20") + " "; - } - - - @Override - public void substituteTemplateCall(String templateName, Map parameterMap, Appendable writer) throws IOException { - - writer.append("@@Template@").append(templateName.toLowerCase().trim()).append("@"); - - Json params = new Json(); - - for (String key : parameterMap.keySet()) { - - WikiParser model = new WikiParser(); - StringBuilder builder = new StringBuilder(); - model.render(new PlainTextConverter(), parameterMap.get(key), builder, true, false); - String str = builder.toString().replace("[\r\n ]+", " ").replaceAll(" +", " ").trim(); - writer.append(str); - Matcher match = Pattern.compile("@@Template@([^@]+)@", Pattern.MULTILINE).matcher(builder.toString()); - Json done = new Json(); - while (match.find()) { - if (data.containsKey(match.group(1).toLowerCase().trim())) { - done.put(match.group(1).toLowerCase().trim(), data.get(match.group(1).toLowerCase().trim())); - data.remove(match.group(1).toLowerCase().trim()); - } - } - if (!key.equals("prec") && !key.equals("wikidata")) { - if (!done.isEmpty()) { - params.put(key.toLowerCase().trim(), done); - } else if (!str.isEmpty()) { - params.put(key.toLowerCase().trim(), str); - } - } - } - - if (params.size() == 1 && params.containsKey("1")) { - data.put(templateName.toLowerCase().trim(), params.get("1")); - } else if (params.keySet().stream().allMatch(name -> name.matches("[0-9]+"))) { - data.put(templateName.toLowerCase().trim(), params.values().stream().toList()); - } else { - data.put(templateName.toLowerCase().trim(), params); - } - } - -} \ No newline at end of file diff --git a/seeker/snippet/adb-shell-as-root-dump-vendor.sh b/seeker/snippet/adb-shell-as-root-dump-vendor.sh deleted file mode 100644 index 92ebd37a..00000000 --- a/seeker/snippet/adb-shell-as-root-dump-vendor.sh +++ /dev/null @@ -1,5 +0,0 @@ -#date: 2024-08-29T17:02:18Z -#url: https://api.github.com/gists/0cd89a25c730ac267559c44b5487c9ff -#owner: https://api.github.com/users/spezifisch - -tar -cvzf /sdcard/proprietary-files.tar.gz /odm/etc /product/app /product/priv-app /system/etc /system_ext/app /system_ext/bin /system_ext/etc /system_ext/framework /system_ext/lib64 /system_ext/priv-app /system/framework /vendor/app /vendor/bin /vendor/etc /vendor/firmware /vendor/gpu /vendor/lib64 /vendor/lib \ No newline at end of file diff --git a/seeker/snippet/bench.py b/seeker/snippet/bench.py deleted file mode 100644 index a827e4d0..00000000 --- a/seeker/snippet/bench.py +++ /dev/null @@ -1,139 +0,0 @@ -#date: 2024-08-30T16:36:13Z -#url: https://api.github.com/gists/6832dd94f8bb34a1a6a5a20de6af6132 -#owner: https://api.github.com/users/samwho - -import random -import string -import time -from typing import Tuple - -import psycopg2 -from psycopg2.extensions import cursor -from rich.console import Console -from rich.table import Table - -LARGE_STRING = "a" * 64 * 1024 - - -def random_string(length: int = 10) -> str: - return "".join(random.choices(string.ascii_lowercase, k=length)) - - -def create_tables(cur: cursor) -> None: - cur.execute("""CREATE TABLE IF NOT EXISTS int_table - (id INTEGER PRIMARY KEY, value TEXT)""") - cur.execute("""CREATE TABLE IF NOT EXISTS string_table - (id TEXT PRIMARY KEY, value TEXT)""") - - -def truncate_table(cur: cursor, table_name: str) -> None: - cur.execute(f"TRUNCATE TABLE {table_name}") - cur.connection.commit() - - -def insert_data( - cur: cursor, table_name: str, data: list[Tuple[int | str, str]] -) -> float: - total = 0 - truncate_table(cur, table_name) - for record in data: - start_time = time.perf_counter() - cur.execute(f"INSERT INTO {table_name} (id, value) VALUES (%s, %s)", record) - cur.connection.commit() - end_time = time.perf_counter() - total += end_time - start_time - return total - - -def read_data(cur: cursor, table_name: str, ids: list[int | str]) -> float: - total = 0 - for id in ids: - start_time = time.perf_counter() - cur.execute(f"SELECT * FROM {table_name} WHERE id = %s", (id,)) - cur.fetchone() - end_time = time.perf_counter() - total += end_time - start_time - return total - - -def benchmark(num_records: int = 10000, num_reads: int = 1000) -> dict[str, float]: - # PostgreSQL connection parameters - conn_params = { - "dbname": "postgres", - "user": "postgres", - "password": "**********" - "host": "localhost", - "port": "5432", - } - - conn = psycopg2.connect(**conn_params) - cur = conn.cursor() - - # Ensure tables don't exist - cur.execute("DROP TABLE IF EXISTS int_table") - cur.execute("DROP TABLE IF EXISTS string_table") - conn.commit() - - create_tables(cur) - conn.commit() - - ints = list(range(num_records)) - random_ints = ints - random.shuffle(random_ints) - - # Prepare data - int_seq_data = [(i, LARGE_STRING) for i in ints] - int_random_data = [(i, LARGE_STRING) for i in random_ints] - str_seq_data = [(f"{i:010d}", LARGE_STRING) for i in ints] - str_random_data = [(random_string(), LARGE_STRING) for i in ints] - - # Benchmark insertions - int_seq_insert = insert_data(cur, "int_table", int_seq_data) - int_random_insert = insert_data(cur, "int_table", int_random_data) - str_seq_insert = insert_data(cur, "string_table", str_seq_data) - str_random_insert = insert_data(cur, "string_table", str_random_data) - - # Prepare read data - int_seq_ids = [i for i, _ in int_seq_data[:num_reads]] - int_random_ids = [i for i, _ in int_random_data[:num_reads]] - str_seq_ids = [i for i, _ in str_seq_data[:num_reads]] - str_random_ids = [i for i, _ in str_random_data[:num_reads]] - - # Benchmark reads - int_seq_read = read_data(cur, "int_table", int_seq_ids) - int_random_read = read_data(cur, "int_table", int_random_ids) - str_seq_read = read_data(cur, "string_table", str_seq_ids) - str_random_read = read_data(cur, "string_table", str_random_ids) - - cur.close() - conn.close() - - return { - ("int", "sequential", "insert"): int_seq_insert, - ("int", "random", "insert"): int_random_insert, - ("str", "sequential", "insert"): str_seq_insert, - ("str", "random", "insert"): str_random_insert, - ("int", "sequential", "read"): int_seq_read, - ("int", "random", "read"): int_random_read, - ("str", "sequential", "read"): str_seq_read, - ("str", "random", "read"): str_random_read, - } - - -if __name__ == "__main__": - n = 10000 - results = benchmark(num_records=n, num_reads=n) - - table = Table() - - table.add_column("Type", style="cyan", no_wrap=True) - table.add_column("Mode", style="cyan", no_wrap=True) - table.add_column("Operation", style="cyan", no_wrap=True) - table.add_column("Time (seconds)", style="magenta") - - for (type, mode, op), time in results.items(): - table.add_row(type, mode, op, f"{time:.3f}") - - console = Console() - console.print(table) -table) diff --git a/seeker/snippet/chat.py b/seeker/snippet/chat.py deleted file mode 100644 index c1d5f4b4..00000000 --- a/seeker/snippet/chat.py +++ /dev/null @@ -1,94 +0,0 @@ -#date: 2024-08-29T16:50:18Z -#url: https://api.github.com/gists/e1cc87e813835f151f8a342a16764b25 -#owner: https://api.github.com/users/aphexlog - -import json -import boto3 -import logging -import pyaudio -from botocore.exceptions import ClientError -from mypy_boto3_bedrock_runtime.client import BedrockRuntimeClient as BedrockClient -from typing import cast -from pydub import AudioSegment -from io import BytesIO - -# Setup logging -logger = logging.getLogger(__name__) -logging.basicConfig(level=logging.INFO) - -class DragonChatHandler: - def __init__(self, region: str): - self.client = cast(BedrockClient, boto3.client("bedrock-runtime", region_name=region)) - self.polly_client = boto3.client('polly', region_name=region) - - "**********" "**********" "**********" "**********" "**********"d "**********"e "**********"f "**********" "**********"g "**********"e "**********"n "**********"e "**********"r "**********"a "**********"t "**********"e "**********"_ "**********"m "**********"e "**********"s "**********"s "**********"a "**********"g "**********"e "**********"( "**********"s "**********"e "**********"l "**********"f "**********", "**********" "**********"m "**********"o "**********"d "**********"e "**********"l "**********"_ "**********"i "**********"d "**********", "**********" "**********"s "**********"y "**********"s "**********"t "**********"e "**********"m "**********"_ "**********"p "**********"r "**********"o "**********"m "**********"p "**********"t "**********", "**********" "**********"m "**********"e "**********"s "**********"s "**********"a "**********"g "**********"e "**********"s "**********", "**********" "**********"m "**********"a "**********"x "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********"s "**********") "**********": "**********" - body = json.dumps({ - "anthropic_version": "bedrock-2023-05-31", - "max_tokens": "**********" - "system": system_prompt, - "messages": messages - }) - - try: - response = self.client.invoke_model(body=body, modelId=model_id) - response_body = json.loads(response.get('body').read()) - return response_body - except ClientError as err: - message = err.response["Error"]["Message"] - logger.error("A client error occurred: %s", message) - raise - - def send_message(self, message, model="anthropic.claude-3-haiku-20240307-v1:0"): - user_message = {"role": "user", "content": message} - messages = [user_message] - system_prompt = "Please respond to the user's message." - max_tokens = "**********" - - return self.generate_message(model, system_prompt, messages, max_tokens) - - def start_conversation(self, initial_message): - return self.send_message(initial_message) - - def continue_conversation(self, message): - return self.send_message(message) - - def speak_response(self, response_string): - response = self.polly_client.synthesize_speech( - Text=response_string, - OutputFormat='mp3', - VoiceId='Joanna' - ) - - if "AudioStream" in response: - # Convert MP3 to PCM using pydub - audio_stream = response['AudioStream'].read() - sound = AudioSegment.from_mp3(BytesIO(audio_stream)) - raw_data = sound.raw_data - sample_width = sound.sample_width - channels = sound.channels - frame_rate = sound.frame_rate - - # Play the audio - p = pyaudio.PyAudio() - stream = p.open(format=p.get_format_from_width(sample_width), - channels=channels, - rate=frame_rate, - output=True) - - stream.write(raw_data) - - stream.stop_stream() - stream.close() - p.terminate() - -if __name__ == "__main__": - chat_handler = DragonChatHandler("us-east-1") - - # Continue the conversation in a loop - while True: - user_input = input("Input message: ") - response = chat_handler.continue_conversation(user_input) - response_string = response["content"][0]["text"] - - print(f"{response_string}\n") - chat_handler.speak_response(response_string) diff --git a/seeker/snippet/collect_data_openai_sample_imports.py b/seeker/snippet/collect_data_openai_sample_imports.py deleted file mode 100644 index 9be3ee95..00000000 --- a/seeker/snippet/collect_data_openai_sample_imports.py +++ /dev/null @@ -1,23 +0,0 @@ -#date: 2024-08-30T17:12:04Z -#url: https://api.github.com/gists/22d5da6fb48207927a0dc4b8ec62df0d -#owner: https://api.github.com/users/zsasko - -import json -from typing import AsyncGenerator, NoReturn - -import uvicorn -from dotenv import load_dotenv -from fastapi import FastAPI, WebSocket -from fastapi.responses import HTMLResponse -from openai import AsyncOpenAI - -load_dotenv() - -model = "gpt-3.5-turbo" -conversation_history = [] - -app = FastAPI() -client = AsyncOpenAI() - -with open("index.html") as f: - html = f.read() diff --git a/seeker/snippet/csv2star.py b/seeker/snippet/csv2star.py deleted file mode 100644 index f58fec11..00000000 --- a/seeker/snippet/csv2star.py +++ /dev/null @@ -1,55 +0,0 @@ -#date: 2024-08-30T16:56:05Z -#url: https://api.github.com/gists/7781ef8063e0102d7121d799efcb700f -#owner: https://api.github.com/users/shahpnmlab - -import os -import glob -import pandas as pd -import starfile - -# Function to read CSV files -def read_csv_files(directory): - all_data = [] - for filename in glob.glob(os.path.join(directory, '*.csv')): - df = pd.read_csv(filename, header=None, names=['X', 'Y', 'Z']) - tomogram_name = os.path.basename(filename).split('_')[1] - df['MicrographName'] = f'TS_{tomogram_name}' - all_data.append(df) - return pd.concat(all_data, ignore_index=True) - -# Read all CSV files -data = read_csv_files('particle_lists') - -# Create the particles data block -particles_data = pd.DataFrame({ - 'rlnMicrographName': data['MicrographName'], - 'rlnCoordinateX': data['X'], - 'rlnCoordinateY': data['Y'], - 'rlnCoordinateZ': data['Z'], - 'rlnOriginXAngst': [0] * len(data), - 'rlnOriginYAngst': [0] * len(data), - 'rlnOriginZAngst': [0] * len(data) -}) - -# Create the optics data block -optics_data = pd.DataFrame({ - 'rlnOpticsGroup': [1,""], - 'rlnOpticsGroupName': ['opticsGroup1',""], - 'rlnSphericalAberration': [2.700000,""], - 'rlnVoltage': [300.000000,""], - 'rlnImagePixelSize': [13.48,""], - 'rlnImageSize': [64,""], - 'rlnImageDimensionality': [3,""], - 'rlnPickingImagePixelSize': [13.48,""] -}) - -# Create the STAR file structure -star_data = { - 'optics': optics_data, - 'particles': particles_data -} - -# Write the STAR file -starfile.write(star_data, 'particles.star', overwrite=True) - -print("particles.star file has been created successfully.") \ No newline at end of file diff --git a/seeker/snippet/deepseek_manifold.py b/seeker/snippet/deepseek_manifold.py new file mode 100644 index 00000000..a72477f6 --- /dev/null +++ b/seeker/snippet/deepseek_manifold.py @@ -0,0 +1,100 @@ +#date: 2024-09-02T16:57:49Z +#url: https://api.github.com/gists/d81365117c7f311be8a585a663c11a10 +#owner: https://api.github.com/users/shock + +from typing import List, Union, Generator, Iterator +from pydantic import BaseModel + +import os +import requests + + +class Pipeline: + class Valves(BaseModel): + DEEPSEEK_API_BASE_URL: str = "https://api.deepseek.com" + DEEPSEEK_API_KEY: str = "" + pass + + def __init__(self): + self.type = "manifold" + # Optionally, you can set the id and name of the pipeline. + # Best practice is to not specify the id so that it can be automatically inferred from the filename, so that users can install multiple versions of the same pipeline. + # The identifier must be unique across all pipelines. + # The identifier must be an alphanumeric string that can include underscores or hyphens. It cannot contain spaces, special characters, slashes, or backslashes. + # self.id = "deepseek_pipeline" + self.name = "deepseek/" + + self.valves = self.Valves( + **{ + "DEEPSEEK_API_KEY": os.getenv( + "DEEPSEEK_API_KEY", "your-openai-api-key-here" + ) + } + ) + + self.pipelines = self.get_deepseek_models() + pass + + async def on_startup(self): + # This function is called when the server is started. + print(f"on_startup:{__name__}") + pass + + async def on_shutdown(self): + # This function is called when the server is stopped. + print(f"on_shutdown:{__name__}") + pass + + async def on_valves_updated(self): + # This function is called when the valves are updated. + print(f"on_valves_updated:{__name__}") + self.pipelines = self.get_deepseek_models() + pass + + def get_deepseek_models(self): + return [ + {"id": "deepseek-coder", "name": "deepseek-coder"}, + {"id": "deepseek-chat", "name": "deepseek-chat"}, + ] + + + def pipe( + self, user_message: str, model_id: str, messages: List[dict], body: dict + ) -> Union[str, Generator, Iterator]: + # This is where you can add your custom pipelines like RAG. + print(f"pipe:{__name__}") + + print(messages) + print(user_message) + + headers = {} + headers["Authorization"] = f"Bearer {self.valves.DEEPSEEK_API_KEY}" + headers["Content-Type"] = "application/json" + + payload = {**body, "model": model_id} + + if "user" in payload: + del payload["user"] + if "chat_id" in payload: + del payload["chat_id"] + if "title" in payload: + del payload["title"] + + print(payload) + + try: + r = requests.post( + url=f"{self.valves.DEEPSEEK_API_BASE_URL}/chat/completions", + json=payload, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + if body["stream"]: + return r.iter_lines() + else: + return r.json() + except Exception as e: + return f"Error: {e}" diff --git a/seeker/snippet/disable.bash b/seeker/snippet/disable.bash deleted file mode 100644 index 3e9b2ad0..00000000 --- a/seeker/snippet/disable.bash +++ /dev/null @@ -1,13 +0,0 @@ -#date: 2024-08-30T17:10:15Z -#url: https://api.github.com/gists/f1825a7e2c54c2c6e46095f95c697285 -#owner: https://api.github.com/users/genzj - -#!/bin/bash - -# ref: -# https://askubuntu.com/a/1167767 -# https://manpages.ubuntu.com/manpages/bionic/man5/NetworkManager.conf.5.html#connectivity%20section - -sudo cp --backup=t /etc/NetworkManager/NetworkManager.conf /etc/NetworkManager/NetworkManager.conf.backup -echo -e "\n[connectivity]\nuri=\n" | sudo tee -a /etc/NetworkManager/NetworkManager.conf -sudo systemctl restart NetworkManager.service \ No newline at end of file diff --git a/seeker/snippet/docker_iptables b/seeker/snippet/docker_iptables deleted file mode 100644 index 2990985d..00000000 --- a/seeker/snippet/docker_iptables +++ /dev/null @@ -1,136 +0,0 @@ -#date: 2024-08-30T16:41:10Z -#url: https://api.github.com/gists/9bb557948c27cf1d486d74d711f28638 -#owner: https://api.github.com/users/Sartan4455 - -#!/bin/bash -# Copyright 2020-2022 Tomas Barton -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -set -o nounset -o pipefail -# -function -h { - cat <&1 ;} -function out { printf '%s\n' "$*" ;} - -function iptables_apply { - local binary="$1" - local table="$2" - local action="$3" - local rule="$4" - local noop=$5 - local verbose=$6 - - # check if the rule is already defined - eval "${binary} -t ${table} --check ${rule} 2>/dev/null" - if [[ $? -ne 0 ]]; then - if [[ $noop == true ]]; then - msg $rule; - else - if [[ $verbose == true ]]; then - msg "${rule}" - fi - eval "${binary} -t ${table} ${action} ${rule}"; - fi - fi -} - -function main { - local verbose=false - local debug=false - local noop=false - local interface="docker0" - local binary="iptables" - - while [[ $# -gt 0 ]] - do - case "$1" in # Munging globals, beware - -i|--interface) interface="$2"; shift 2 ;; - -b|--binary) binary="$2"; shift 2 ;; - -n|--noop) noop=true; shift 1 ;; - -v|--verbose) verbose=true; shift 1 ;; - -d|--debug) debug=true; shift 1 ;; - *) err 'Argument error. Please see help: -h' ;; - esac - done - - if [[ $debug == true ]]; then - set -x - fi - - if [[ $noop == true ]]; then - msg "NOOP: Only printing iptables rules to be eventually applied" - fi - - # list currently running container IDs - local containers=$(docker ps --format '{{.ID}}') - if [[ ! -z "$containers" ]]; then - while read -r cont; do - # old docker API response - local ip=$(docker inspect -f '{{.NetworkSettings.IPAddress}}' ${cont}) - if [[ -z "${ip}" ]]; then - # newer docker API, probably > 23.01 - ip=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${cont}) - fi - if [[ $verbose == true ]]; then - msg "Container ${cont}" - fi - # extract port forwarding - local ports=$(docker inspect -f '{{json .NetworkSettings.Ports}}' ${cont}) - if [[ "${ports}" != "{}" ]]; then - local fwd=$(echo "${ports}" | jq -r '. as $a| keys[] | select($a[.]!=null) as $f | "\($f)/\($a[$f][].HostPort)"') - if [[ ! -z "$fwd" ]]; then - # pass tripples likes `3000/tcp/29956` - while read -r pfwd; do - local dport protocol hport - local IFS="/" - read dport protocol hport <<< "${pfwd}" - if [[ -z "${ip}" ]]; then - err "ERROR: Empty IP for container: ${cont}" - fi - local rule="DOCKER -d ${ip}\/32 ! -i ${interface} -o ${interface} -p ${protocol} -m ${protocol} --dport ${dport} -j ACCEPT" - iptables_apply "${binary}" "filter" "-A" "${rule}" ${noop} ${verbose} - rule="POSTROUTING -s ${ip}\/32 -d ${ip}\/32 -p ${protocol} -m ${protocol} --dport ${dport} -j MASQUERADE" - iptables_apply "${binary}" "nat" "-A" "${rule}" ${noop} ${verbose} - rule="DOCKER ! -i ${interface} -p ${protocol} -m ${protocol} --dport ${hport} -j DNAT --to-destination ${ip}:${dport}" - iptables_apply "${binary}" "nat" "-A" "${rule}" ${noop} ${verbose} - done <<< "$fwd" - fi - fi - done <<< "$containers" - fi -} - -if [[ ${1:-} ]] && declare -F | cut -d' ' -f3 | fgrep -qx -- "${1:-}" -then - case "$1" in - -h|--help) : ;; - *) ;; - esac - "$@" -else - main "$@" -fi diff --git a/seeker/snippet/env.go b/seeker/snippet/env.go new file mode 100644 index 00000000..234c1036 --- /dev/null +++ b/seeker/snippet/env.go @@ -0,0 +1,56 @@ +//date: 2024-09-02T16:44:34Z +//url: https://api.github.com/gists/17864e071fd0fbd17b33e511687112d1 +//owner: https://api.github.com/users/yszkst + +package util + +import "os" + +// example +// +// es := EnvSetter() +// defer es.Recover() +// es.Setenv("key1", "v1") +// es.Setenv("key2", "v2") +// es.Unsetenv("key3") +// +// ... +type EnvSetter struct { + // 環境変数のキーと値のペア + // 値がnilの場合はRecoverでunsetする + envvars map[string]*string +} + +// 上書き前の値保存 +// すでに同じkeyが存在していればそのままにする +func (es *EnvSetter) persistCurrent(key string) { + _, alreadyHas := es.envvars[key] + if !alreadyHas { + cur, has := os.LookupEnv(key) + if has { + es.envvars[key] = &cur + } else { + es.envvars[key] = nil + } + } +} + +func (es *EnvSetter) Setenv(key string, value string) error { + es.persistCurrent(key) + return os.Setenv(key, value) +} + +func (es *EnvSetter) Unsetenv(key string) error { + es.persistCurrent(key) + return os.Unsetenv(key) +} + +func (es *EnvSetter) Recover() { + for key, value := range es.envvars { + if value == nil { + os.Unsetenv(key) + } else { + os.Setenv(key, *value) + } + } +} diff --git a/seeker/snippet/error-handling-flask.py b/seeker/snippet/error-handling-flask.py deleted file mode 100644 index de88dde6..00000000 --- a/seeker/snippet/error-handling-flask.py +++ /dev/null @@ -1,15 +0,0 @@ -#date: 2024-08-29T16:56:54Z -#url: https://api.github.com/gists/f31e7d620f9525d0e6e7dd81e3d47be9 -#owner: https://api.github.com/users/docsallover - -from flask import Flask, render_template - -app = Flask(__name__) - -@app.errorhandler(404) -def page_not_found(error): - return render_template('404.html'), 404 - -@app.errorhandler(500) -def internal_server_error(error): - return render_template('500.html'), 500 \ No newline at end of file diff --git a/seeker/snippet/extract.go b/seeker/snippet/extract.go new file mode 100644 index 00000000..1071af96 --- /dev/null +++ b/seeker/snippet/extract.go @@ -0,0 +1,131 @@ +//date: 2024-09-02T17:05:23Z +//url: https://api.github.com/gists/8eca49b36dab6eda8163569ff0a1ab66 +//owner: https://api.github.com/users/pjmagee + +package main + +import ( + "context" + "dagger/dsg-icis-openapi-kiota/internal/dagger" + "fmt" + "slices" +) + +type Language string + +const ( + CSharp string = "CSharp" + Go string = "Go" + Java string = "Java" + Python string = "Python" +) + +var Specs = []Spec{ + NewSpec("https://developer.icis.com/portals/api/sites/icis-live-portal/liveportal/apis/energyapi/download_spec", []Settings{ + NewSettings(CSharp, "Icis.Api.Energy", "Energy"), + NewSettings(Go, "icis/api/energy", "energy"), + NewSettings(Java, "com.icis.api.energy", "src/main/java/com/icis/api/energy"), + NewSettings(Python, "icis_api_energy", "energy"), + }), + NewSpec("https://developer.icis.com/portals/api/sites/icis-live-portal/liveportal/apis/energyforesightapi/download_spec", []Settings{ + NewSettings(CSharp, "Icis.Api.EnergyForesight", "EnergyForesight"), + NewSettings(Go, "icis/api/energyforesight", "energyforesight"), + NewSettings(Java, "com.icis.api.energyforesight", "src/main/java/com/icis/api/energyforesight"), + NewSettings(Python, "icis_api_energyforesight", "energyforesight"), + }), + NewSpec("https://developer.icis.com/portals/api/sites/icis-live-portal/liveportal/apis/lnganalyticsapi/download_spec", []Settings{ + NewSettings(CSharp, "Icis.Api.LngAnalytics", "LngAnalytics"), + NewSettings(Go, "icis/api/lnganalytics", "lnganalytics"), + NewSettings(Java, "com.icis.api.lnganalytics", "src/main/java/com/icis/api/lnganalytics"), + NewSettings(Python, "icis_api_lnganalytics", "lnganalytics"), + }), +} + +type Settings struct { + Language string + Namespace string + Path string +} + +func NewSettings(language string, namespace string, path string) Settings { + return Settings{ + Language: language, + Namespace: namespace, + Path: path, + } +} + +type Spec struct { + URL string + Settings []Settings +} + +func NewSpec(url string, settings []Settings) Spec { + return Spec{ + URL: url, + Settings: settings, + } +} + +// Generates Kiota clients for the ICIS OpenAPI specs +func (m *DsgIcisOpenapiKiota) GenerateKiotaClients( + ctx context.Context, + languages []string, + // +optional + // +default="1.15.0" + // The default version of the Kiota tool to use + version string) *dagger.Container { + + return dag. + Container(). + From("mcr.microsoft.com/dotnet/sdk:8.0"). + WithoutUser(). + WithExec([]string{"dotnet", "tool", "install", "Microsoft.OpenApi.Kiota", "--tool-path", "/app"}). + WithWorkdir("/app"). + WithoutEntrypoint(). + With(func(r *dagger.Container) *dagger.Container { + return kiotaCommands(r, languages) + }). + With(func(r *dagger.Container) *dagger.Container { + entries, _ := r.Directory("/output").Entries(ctx) + if len(entries) > 0 { + return r.WithExec([]string{"tar", "-czvf", "/output.tar.gz", "-C", "/", "output"}) + } + return r + }) +} + +// func (m *DsgIcisOpenapiKiota) Kiota(version string) *dagger.Container { +// return dag. +// Container(). +// WithoutUser(). +// From("mcr.microsoft.com/dotnet/sdk:8.0"). +// WithExec([]string{"mkdir", "/app"}). +// WithExec([]string{"mkdir", "/output"}). +// WithExec([]string{"dotnet", "tool", "install", "Microsoft.OpenApi.Kiota", "--tool-path", "/app", "--version", version}) +// } + +func kiotaCommands(container *dagger.Container, languages []string) *dagger.Container { + + for _, spec := range Specs { + for _, settings := range spec.Settings { + if slices.Contains(languages, settings.Language) { + container = container. + WithExec([]string{ + "./kiota", + "generate", + "--output", fmt.Sprintf("/output/%s/%s", settings.Language, settings.Path), + "--language", string(settings.Language), + "--namespace-name", settings.Namespace, + "--openapi", spec.URL, + "--exclude-backward-compatible", "true", + "--log-level", "Debug", + "--additional-data", "true", + "--class-name", "ApiClient", + }) + } + } + } + + return container +} \ No newline at end of file diff --git a/seeker/snippet/flask.py b/seeker/snippet/flask.py deleted file mode 100644 index 7c292ac7..00000000 --- a/seeker/snippet/flask.py +++ /dev/null @@ -1,12 +0,0 @@ -#date: 2024-08-29T16:52:07Z -#url: https://api.github.com/gists/5290fa55e63c46023f99260af082a329 -#owner: https://api.github.com/users/docsallover - -from flask import render_template -from flask_sqlalchemy import Pagination - -@app.route('/users') -def users(): - page = request.args.get('page', 1, type=int) - pagination = User.query.paginate(page, per_page=10) - return render_template('users.html', pagination=pagination) \ No newline at end of file diff --git a/seeker/snippet/forty.java b/seeker/snippet/forty.java deleted file mode 100644 index cc3e1e88..00000000 --- a/seeker/snippet/forty.java +++ /dev/null @@ -1,31 +0,0 @@ -//date: 2024-08-30T16:55:29Z -//url: https://api.github.com/gists/6efae7ebf845724b89a20348dc9521fc -//owner: https://api.github.com/users/sasub-mlp - -import java.util.Scanner; - -public class forty { - static int arrsearch(int[] arr, int x) { - for (int i=0;i 1: - path = path.interpolated(num_vars) - return Path(self.transform(path.vertices), path.codes) - - class RadarAxes(PolarAxes): - - name = "radar" - PolarTransform = RadarTransform - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # rotate plot such that the first axis is at the top - self.set_theta_zero_location("N") - - def fill(self, *args, closed=True, **kwargs): - """Override fill so that line is closed by default""" - return super().fill(closed=closed, *args, **kwargs) - - def plot(self, *args, **kwargs): - """Override plot so that line is closed by default""" - lines = super().plot(*args, **kwargs) - for line in lines: - self._close_line(line) - - def _close_line(self, line): - x, y = line.get_data() - # FIXME: markers at x[0], y[0] get doubled-up - if x[0] != x[-1]: - x = np.append(x, x[0]) - y = np.append(y, y[0]) - line.set_data(x, y) - - def set_varlabels(self, labels): - self.set_thetagrids(np.degrees(theta), labels) - - def _gen_axes_patch(self): - # The Axes patch must be centered at (0.5, 0.5) and of radius 0.5 - # in axes coordinates. - if frame == "circle": - return Circle((0.5, 0.5), 0.5) - elif frame == "polygon": - return RegularPolygon((0.5, 0.5), num_vars, radius=0.5, edgecolor="k") - else: - raise ValueError("Unknown value for 'frame': %s" % frame) - - def _gen_axes_spines(self): - if frame == "circle": - return super()._gen_axes_spines() - elif frame == "polygon": - # spine_type must be 'left'/'right'/'top'/'bottom'/'circle'. - spine = Spine(axes=self, spine_type="circle", path=Path.unit_regular_polygon(num_vars)) - # unit_regular_polygon gives a polygon of radius 1 centered at - # (0, 0) but we want a polygon of radius 0.5 centered at (0.5, - # 0.5) in axes coordinates. - spine.set_transform(Affine2D().scale(0.5).translate(0.5, 0.5) + self.transAxes) - return {"polar": spine} - else: - raise ValueError("Unknown value for 'frame': %s" % frame) - - register_projection(RadarAxes) - return theta - - -theta = radar_Randomy(4, frame="polygon") -data = np.array( - [ - [0.55192003, 0.94219184, 0.49221465, 0.23299307], - [0.86465455, 0.92235546, 0.07574812, 0.44711059], - [0.41477989, 0.2781122, 0.61265786, 0.6239259], - ] -) - -fig = plt.figure(figsize=(4, 4)) -ax = plt.subplot(projection="radar") - -ax.set_rgrids([0.2, 0.4, 0.6, 0.8]) -ax.set_varlabels(["Axis 1", "Axis 2 ", "Axis 3", " Axis 4"]) -for idx in range(len(data)): - ax.plot(theta, data[idx]) - ax.fill(theta, data[idx], alpha=0.25, label="_nolegend_") - -labels = ("Random 1", "Random 2", "Random 3") -legend = ax.legend(labels, loc=(0.9, 0.95), fontsize="small") \ No newline at end of file diff --git a/seeker/snippet/repack.sh b/seeker/snippet/repack.sh deleted file mode 100644 index 058a12da..00000000 --- a/seeker/snippet/repack.sh +++ /dev/null @@ -1,56 +0,0 @@ -#date: 2024-08-29T17:01:50Z -#url: https://api.github.com/gists/a0b52ac833a4692bb0e9ab214f09afbe -#owner: https://api.github.com/users/kenvandine - -#!/bin/bash - -dir=$(dirname $(realpath $0)) -in=$1 - -if [ $UID != 0 ]; -then - echo "Must be run with root privileges, for example with sudo" - exit -fi - -if [ $# -lt 1 ]; -then - echo "USAGE: sudo $0 SOURCE_ISO" - exit -fi - -if [ -d $dir/out ]; -then - rm $dir/out/* 2>/dev/null -else - mkdir $dir/out -fi - -if [ ! -d $dir/debs ]; -then - mkdir $dir/debs -fi - -date=$(date "+%Y%m%d-%H%M") - -# Output file should be NAME-UBUNTUVERSION-DATE-HOUR:MINUTE-ARCH.iso -out=$(echo "${in//ubuntu/NAME}") -out=$(echo "${out//base/$date}") - -echo "Fetching local debian packages" -wget -O $dir/debs/google-chrome-stable_current_amd64.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb - -cd $dir -echo $out > iso-version - -echo "Creating $out" -echo "Adding local debs to pool" -livefs-editor $in out/repack.iso --add-debs-to-pool debs/*.deb -echo "Copying in autoinstall.yaml" -livefs-editor out/repack.iso out/repack2.iso --cp $PWD/autoinstall.yaml new/iso/autoinstall.yaml -rm -f out/repack.iso -livefs-editor out/repack2.iso out/repack3.iso --cp $PWD/iso-version new/iso/iso-version -rm -f out/repack2.iso -mv out/repack3.iso $out - -echo "$out created" diff --git a/seeker/snippet/repackepub.sh b/seeker/snippet/repackepub.sh deleted file mode 100644 index 241c1a26..00000000 --- a/seeker/snippet/repackepub.sh +++ /dev/null @@ -1,5 +0,0 @@ -#date: 2024-08-29T16:55:08Z -#url: https://api.github.com/gists/11e26db32d5c7a870476954a1cd40666 -#owner: https://api.github.com/users/pa-0 - -zip -rX "../$(basename "$(realpath .)").epub" mimetype $(ls|xargs echo|sed 's/mimetype//g') \ No newline at end of file diff --git a/seeker/snippet/run.sh b/seeker/snippet/run.sh new file mode 100644 index 00000000..d23cff5c --- /dev/null +++ b/seeker/snippet/run.sh @@ -0,0 +1,24 @@ +#date: 2024-09-02T16:54:10Z +#url: https://api.github.com/gists/a9dc47e7924feb6dc3ad32ac13751ffb +#owner: https://api.github.com/users/hrivera-ntap + +#!/usr/bin/env sh + +set -x + +kubectl get pods -A --selector app=tekton-pipelines-controller,pipeline.tekton.dev/release --show-labels +echo "" +sleep 5 + +kubectl delete -f tekton-pipeline.yaml || true +sleep 10 +kubectl apply -f tekton-pipeline.yaml +sleep 30 + +# debug +kubectl describe pipelineruns -n tekton-timeout-bug +echo "" +kubectl get taskruns -n tekton-timeout-bug +echo "" +kubectl get pipelineruns -n tekton-timeout-bug +echo "" \ No newline at end of file diff --git a/seeker/snippet/runner.go b/seeker/snippet/runner.go deleted file mode 100644 index 8551000b..00000000 --- a/seeker/snippet/runner.go +++ /dev/null @@ -1,112 +0,0 @@ -//date: 2024-08-29T16:54:35Z -//url: https://api.github.com/gists/c69cdc91a4dbf731bee5aba1696e00da -//owner: https://api.github.com/users/semenovdev - -package main - -import ( - "context" - "fmt" - "io" - "log" - "log/slog" - "time" - - "awesomeProject/runner" -) - -type Service1 struct { - io.Closer -} - -func (s *Service1) Run(ctx context.Context) error { - fmt.Println("start executing of Service1") - time.Sleep(time.Minute * 10) - fmt.Printf("Service1 is running (%s)\n", ctx.Value("Привет")) - return nil -} - -func (s *Service1) Close() error { - fmt.Println("close service1") - return nil -} - -type Service2 struct { - io.Closer - - Cancel func() -} - -func (s *Service2) Run(ctx context.Context) error { - time.Sleep(time.Second * 2) - fmt.Printf("Service2 is running (%s)\n", ctx.Value("Привет")) - time.Sleep(time.Minute) - s.Cancel() - return nil -} - -func (s *Service2) Close() error { - fmt.Println("close service2") - return nil -} - -type Service3 struct { - io.Closer -} - -func (s *Service3) Run(ctx context.Context) error { - time.Sleep(time.Second * 3) - fmt.Printf("Service3 is running (%s)\n", ctx.Value("Привет")) - select {} - return nil -} - -func (s *Service3) Close() error { - fmt.Println("close service3") - return nil -} - -type Service4 struct { - io.Closer -} - -func (s *Service4) Run(ctx context.Context) error { - fmt.Printf("Service4 with error (%s)\n", ctx.Value("Привет")) - return fmt.Errorf("error in service 4") -} - -func (s *Service4) Close() error { - fmt.Println("close service4") - return nil -} - -func main() { - ctx := context.Background() - ctx = context.WithValue(ctx, "Привет", "Мир") - ctx, cancel := context.WithCancel(ctx) - - service1 := &Service1{} // завершится по таймауту и не затронет систему - service2 := &Service2{ // упадёт через минуту после старта и утянет за собой всё - Cancel: cancel, - } - service3 := &Service3{} // работал бы бесконечно, если бы не Service2 - service4 := &Service4{} // завершится с ошибкой в кроне и запишет её в лог - - app := runner.New( - runner.WithContext(ctx), - runner.WithCronJobTimeout(time.Second), - runner.WithErrorLogger(slog.Default()), - ) - err := app.AddCronJob("* * * * *", service1) - if err != nil { - log.Fatal(err) - } - err = app.AddCronJob("* * * * *", service4) - if err != nil { - log.Fatal(err) - } - - app.RegisterService(service2) - app.RegisterService(service3) - app.Run() -} diff --git a/seeker/snippet/spbpu_schedule_to_google_calendar.py b/seeker/snippet/spbpu_schedule_to_google_calendar.py deleted file mode 100644 index b5013d3a..00000000 --- a/seeker/snippet/spbpu_schedule_to_google_calendar.py +++ /dev/null @@ -1,262 +0,0 @@ -#date: 2024-08-29T17:04:19Z -#url: https://api.github.com/gists/335367f343d28ca3f6612f965b33bbd3 -#owner: https://api.github.com/users/iwsylit - -""" -Simple script for copying SPbPU lessons to Google Calendar. - -Before usage: -- find the id of your group -- get Google Calendar API credentials -- fill env variables (GROUP_ID, GOOGLE_CALENDAR_ID, GOOGLE_SECRETS_FILE, GOOGLE_CREDENTIALS_FILE) -- pip install requests==2.32.3 google-api-python-client==2.142.0 google-auth-oauthlib==2.0.0 -""" - -import logging -import os -import pickle -from abc import ABC, abstractmethod -from datetime import datetime, timedelta -from operator import itemgetter -from typing import Any, Self -from zoneinfo import ZoneInfo - -import requests -from google_auth_oauthlib.flow import InstalledAppFlow -from googleapiclient.discovery import build - -logging.basicConfig(format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S") - - -def getenv(key: str) -> str: - var = os.getenv(key) - - if var is None: - raise ValueError(f"There is no env variable {key}") - - return var - - -class DateTime(datetime): - def __new__(cls, *args: Any, timezone: ZoneInfo = ZoneInfo("Etc/GMT-3"), **kwargs: Any) -> Self: - instance = super().__new__(cls, *args, **kwargs) - - return instance.replace(tzinfo=timezone) - - @classmethod - def from_datetime(cls, datetime: datetime) -> Self: - return cls( - datetime.year, - datetime.month, - datetime.day, - datetime.hour, - datetime.minute, - datetime.second, - datetime.microsecond, - ) - - @classmethod - def from_iso(cls, date: str) -> Self: - return cls.from_datetime(datetime.fromisoformat(date)) - - @classmethod - def from_date_time(cls, date: str, time: str) -> Self: - return cls.from_datetime(datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M")) - - @classmethod - def today(cls) -> Self: - return cls.now().replace(hour=0, minute=0, second=0, microsecond=0) - - def monday(self) -> Self: - days_to_monday = timedelta(days=self.weekday()) - monday = self - days_to_monday - - return monday.replace(hour=0, minute=0, second=0, microsecond=0) - - def isodateformat(self) -> str: - return self.strftime("%Y-%m-%d") - - -class Event(ABC): - def __init__(self, id: str, name: str, location: str, description: str, start: DateTime, end: DateTime) -> None: - self.id = id - self.name = name - self.location = location - self.description = description - self.start = start - self.end = end - - @abstractmethod - def construct(cls, event: dict) -> Self: - pass - - def googleformat(self) -> dict: - return { - "summary": self.name, - "location": self.location, - "description": self.description, - "start": { - "dateTime": self.start.isoformat(), - }, - "end": { - "dateTime": self.end.isoformat(), - }, - } - - def __eq__(self, value: object) -> bool: - return self.__hash__() == value.__hash__() - - def __hash__(self) -> int: - return hash((self.name, self.location, self.description, self.start, self.end)) - - def __repr__(self) -> str: - return f"{self.name}; {self.start.time()}-{self.end.time()}; {self.description}" - - -class GoogleEvent(Event): - @classmethod - def construct(cls, event: dict) -> Self: - return cls( - id=event["id"], - name=event["summary"], - location=event["location"], - description=event["description"], - start=DateTime.from_iso(event["start"]["dateTime"]), - end=DateTime.from_iso(event["end"]["dateTime"]), - ) - - -class PolyEvent(Event): - @classmethod - def construct(cls, event: dict) -> Self: - auditory = event["auditories"][0] - - teacher = ", ".join(map(itemgetter("full_name"), event["teachers"])) - lms = "LMS: " + event["lms_url"] if event["lms_url"] else "" - webinar = "Webinar: " + event["webinar_url"] if event["webinar_url"] else "" - - return cls( - id="", - name=event["subject"], - location=f"{auditory['building']['name']}, ауд. {auditory['name']}", - description="\n".join([teacher, lms, webinar]).strip(), - start=DateTime.from_date_time(event["date"], event["time_start"]), - end=DateTime.from_date_time(event["date"], event["time_end"]), - ) - - -class Calendar(ABC): - def __init__(self) -> None: - super().__init__() - logging.info(f"Connecting to {self.__class__.__name__}") - - @abstractmethod - def list_week_events(self, start: DateTime) -> set[Event]: - pass - - -class GoogleCalendar(Calendar): - _scopes = ["https://www.googleapis.com/auth/calendar"] - _secrets_file = "**********" - _credentials_file = getenv("GOOGLE_CREDENTIALS_FILE") - _calendar_id = getenv("GOOGLE_CALENDAR_ID") - - def __init__(self) -> None: - super().__init__() - - if not os.path.exists(self._credentials_file): - flow = "**********" - creds = flow.run_local_server(port=0) - - with open(self._credentials_file, "wb") as f: - pickle.dump(creds, f) - else: - with open(self._credentials_file, "rb") as f: - creds = pickle.load(f) - - self.api = build("calendar", "v3", credentials=creds) - - def list_week_events(self, start: DateTime) -> set[Event]: - end = start + timedelta(days=6) - - events = ( - self.api.events() - .list( - calendarId=self._calendar_id, - timeMin=start.isoformat(), - timeMax=end.isoformat(), - ) - .execute() - )["items"] - - return set(map(GoogleEvent.construct, events)) - - def create(self, event: Event) -> None: - logging.info(f"Create event {event}") - self.api.events().insert(calendarId=self._calendar_id, body=event.googleformat()).execute() - - def remove(self, event: Event) -> None: - logging.info(f"Remove event {event}") - self.api.events().delete(calendarId=self._calendar_id, eventId=event.id).execute() - - -class PolyCalendar(Calendar): - _group_id = getenv("GROUP_ID") - - def list_week_events(self, start: DateTime) -> set[Event]: - response = requests.get(self._url(start)) - response.raise_for_status() - schedule = response.json() - - events = [] - - for day in schedule["days"]: - for event in day["lessons"]: - event["date"] = day["date"] - - events.append(PolyEvent.construct(event)) - - return set(events) - - def _url(self, start: DateTime) -> str: - return f"https://ruz.spbstu.ru/api/v1/ruz/scheduler/{self._group_id}?date={start.isodateformat()}" - - -if __name__ == "__main__": - logging.info("Begin working") - - poly_calendar = PolyCalendar() - google_calendar = GoogleCalendar() - - for week in range(4): - start = DateTime.today().monday() + timedelta(days=7 * week) - - logging.info(f"Parse {start.isodateformat()} week") - - poly_events = poly_calendar.list_week_events(start) - google_events = google_calendar.list_week_events(start) - - new_events = poly_events.difference(google_events) - expired_events = google_events.difference(poly_events) - - logging.debug(f"Poly events: {list(poly_events)}") - logging.debug(f"Google events: {list(google_events)}") - logging.debug(f"New events: {list(new_events)}") - logging.debug(f"Expired events: {list(expired_events)}") - - if not new_events and not expired_events: - logging.info("There is no updates") - elif not new_events: - logging.info("There is no new events") - elif not expired_events: - logging.info("There is no expired events") - - for event in expired_events: - google_calendar.remove(event) - - for event in new_events: - google_calendar.create(event) -nt) - - for event in new_events: - google_calendar.create(event) diff --git a/seeker/snippet/swap.sh b/seeker/snippet/swap.sh new file mode 100644 index 00000000..765b7fa6 --- /dev/null +++ b/seeker/snippet/swap.sh @@ -0,0 +1,5 @@ +#date: 2024-09-02T16:43:30Z +#url: https://api.github.com/gists/c82291ccf90bf8f83109320c5912cee4 +#owner: https://api.github.com/users/docsallover + +sudo fallocate -l 2G /swapfile \ No newline at end of file diff --git a/seeker/snippet/switch-desktop-env.sh b/seeker/snippet/switch-desktop-env.sh new file mode 100644 index 00000000..08347b0a --- /dev/null +++ b/seeker/snippet/switch-desktop-env.sh @@ -0,0 +1,5 @@ +#date: 2024-09-02T16:59:12Z +#url: https://api.github.com/gists/3dd38630a3dede4c151e3354e6f3393d +#owner: https://api.github.com/users/docsallover + +sudo update-alternatives --config x-session-manager \ No newline at end of file diff --git a/seeker/snippet/test.py b/seeker/snippet/test.py deleted file mode 100644 index 11b9793c..00000000 --- a/seeker/snippet/test.py +++ /dev/null @@ -1,129 +0,0 @@ -#date: 2024-08-30T17:10:20Z -#url: https://api.github.com/gists/215f0c315c532c90b8e7d1310596834a -#owner: https://api.github.com/users/youkaichao - -import torch -from typing import Optional - -from torch._dynamo.backends.common import aot_autograd - -@torch.library.custom_op("custom::paged_attention", mutates_args=[]) -def paged_attention(x: "**********": torch.Tensor, cache: torch.Tensor) -> torch.Tensor: - output = torch.empty_like(x) - num_prefill_tokens = "**********" - bs = x.size(0) - "**********" "**********" "**********" "**********" "**********"i "**********"f "**********" "**********"n "**********"u "**********"m "**********"_ "**********"p "**********"r "**********"e "**********"f "**********"i "**********"l "**********"l "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********"s "**********" "**********"= "**********"= "**********" "**********"0 "**********": "**********" - ... # call decode attention - else: - ... # call prefill attention with x[: "**********" - ... # call decode attention with x[num_prefill_tokens: "**********" - return output - -@paged_attention.register_fake -def _(x: "**********": torch.Tensor, cache: torch.Tensor): - return torch.empty_like(x) - -def attention(x: "**********": torch.Tensor, cache: Optional[torch.Tensor] = None): - if cache is not None: - return torch.ops.custom.paged_attention(x, num_prefill_tokens, cache) - return x * 2 - -eager_model = True - -def custom_compiler(gm, inputs): - - # compilation options - # option 1: pass the full graph to inductor - # option 2: run the model in eager mode - # option 3: find subgraph and replace with kernels inside vLLM - - print(gm._graph.python_code(root_module="self", verbose=True).src) - - # selction logic - static_shape_graphs = dict() - dynamic_shape_graph = None - def forward(*args, **kwargs): - nonlocal static_shape_graphs, dynamic_shape_graph - batchsize = ... # Question: how to get batchsize from args? - if dynamic_shape_graph is None: - # if the input is symbolic shape, compile with dynamic shape support - dynamic_shape_graph = gm.forward - - if eager_model: - return dynamic_shape_graph(*args, **kwargs) - - if batchsize not in static_shape_graphs: - # if the input is static shape, compile with static shape support - static_shape_graphs[batchsize] = gm.forward - return static_shape_graphs[batchsize](*args, **kwargs) - - return forward - -def target_fn(x, num_prefill_tokens: "**********": - x = (x + 1) * 5 - if cache is not None: - x = "**********" - else: - x = x * 2 - x = x.sin() - x = x.cos() - return x - -compiled_target_fn = torch.compile(backend=aot_autograd(fw_compiler=custom_compiler))(target_fn) - -compiled_codes = [] - -def hook(old_colde, new_code): - if old_colde is target_fn.__code__: - compiled_codes.append(new_code) - -torch._dynamo.convert_frame.register_bytecode_hook(hook) - -def dispatcher(x, num_prefill_tokens: "**********": - if len(compiled_codes) < 2: - return compiled_target_fn(x, num_prefill_tokens, cache) - else: - target_fn.__code__ = compiled_codes[1] - return target_fn(x, num_prefill_tokens, cache) - -def test(): - - # profile run, without kv cache, fully static shape, max size - num_prefill_tokens = "**********"=torch.int32) - dispatcher(torch.randn(20, 10), num_prefill_tokens, None) - - # create cache - cache = torch.randn(1, 10) - - # warmup run, mark the input tensor as dynamic - x = torch.randn(10, 10) - torch._dynamo.mark_dynamic(x, 0) - num_prefill_tokens = "**********"=torch.int32) - out = "**********" - print(out) - - # the following run with not trigger Dynamo/Aot Autograd - - # if we are using `--enforce-eager`, we want this to directly run - # with compiled kernel that can handle dynamic shape - y = torch.randn(5, 10) - num_prefill_tokens = "**********"=torch.int32) - out = "**********" - print(out) - - eager_model = False - - # if we are using cudagraph, this is an additional warmup to capture cuda graph - for i in [1, 2, 4, 8, 16]: - y = torch.randn(i, 10) - num_prefill_tokens = "**********"=torch.int32) - out = "**********" - # and then, for later runs, we can directly run with compiled kernel if the shape - # matches the recorded shape. if not, run with dynamic shape - y = torch.randn(4, 10) - num_prefill_tokens = "**********"=torch.int32) - out = "**********" - print(out) - -if __name__ == "__main__": - test() diff --git a/seeker/snippet/tts_backup.py b/seeker/snippet/tts_backup.py deleted file mode 100644 index ddca3774..00000000 --- a/seeker/snippet/tts_backup.py +++ /dev/null @@ -1,182 +0,0 @@ -#date: 2024-08-30T16:44:48Z -#url: https://api.github.com/gists/fabf3408c00b3d37464248378f5decbf -#owner: https://api.github.com/users/DerKleineLi - -import concurrent.futures -import json -import mimetypes -import re -import shutil -import sys -from pathlib import Path - -import magic -import requests - -URL_FOLDER = { - "ColliderURL": ["Models", "Models Raw"], - "DiffuseURL": ["Images", "Images Raw"], - "AssetbundleURL": ["Assetbundles"], - "Nickname": [], - "AssetbundleSecondaryURL": ["Assetbundles"], - "ImageURL": ["Images", "Images Raw"], - "MeshURL": ["Models", "Models Raw"], - "SkyURL": ["Images", "Images Raw"], - "BackURL": ["Images", "Images Raw"], - "URL": ["Images", "Images Raw"], - "FaceURL": ["Images", "Images Raw"], - "ImageSecondaryURL": ["Images", "Images Raw"], - "Item1": ["Audio"], - "NormalURL": ["Images", "Images Raw"], - "PDFUrl": ["PDF"], -} -FOLDER_EXT = { - "Models": ".obj", - "Assetbundles": ".unity3d", - "PDF": ".pdf", -} - - -def get_mod_dir(json_file): - while json_file.name != "Mods": - json_file = json_file.parent - return json_file - - -def copy_file(file, mod_dir, target_dir, target_name=None): - target_file = target_dir / file.relative_to(mod_dir) - if target_name is not None: - target_file = target_file.with_stem(target_name) - target_file.parent.mkdir(parents=True, exist_ok=True) - shutil.copy(file, target_file) - print(f"Copied {target_file.relative_to(target_dir)}") - - -def get_all_urls(data): - urls = {} - if not isinstance(data, dict): - return urls - for key, value in data.items(): - if isinstance(value, dict): - urls.update(get_all_urls(value)) - elif isinstance(value, list): - for item in value: - urls.update(get_all_urls(item)) - elif isinstance(value, str) and value.startswith("http"): - urls[value] = key - return urls - - -def download_file(url, target_dir, file_stem): - try: - # 发送 HTTP GET 请求下载文件 - response = requests.get(url, stream=True) - response.raise_for_status() # 检查请求是否成功 - - # # 获取内容类型并确定文件扩展名 - folder = target_dir.name - first_2048_bytes = response.raw.read(2048) - if folder in FOLDER_EXT: - ext = FOLDER_EXT[folder] - else: - # content_type = response.headers.get("content-type") - # ext = mimetypes.guess_extension(content_type) - # if ext is None: - # ext = ".bin" # 默认扩展名 - # 使用前 2048 字节来检测文件类型 - mime = magic.Magic(mime=True) - content_type = mime.from_buffer(first_2048_bytes) - ext = mimetypes.guess_extension(content_type) - if ext is None: - ext = ".bin" # 默认扩展名 - - # 确定文件名 - file_name = file_stem + ext - file_path = target_dir / file_name - target_dir.mkdir(parents=True, exist_ok=True) - - # 保存文件 - with open(file_path, "wb") as file: - file.write(first_2048_bytes) - for chunk in response.iter_content(chunk_size=8192): - file.write(chunk) - - print(f"Downloaded {url}") - except Exception as e: - print(f"\033[91mFailed to download {url}\033[0m") - - -def process_url(url, key, mod_dir, target_dir): - if not key in URL_FOLDER: - print(f"Key {key} not found in URL_FOLDER") - print(f"URL: {url}") - return - - folder = URL_FOLDER[key] - if len(folder) == 0: - return - folder = folder[0] # the Raw folder is not considered - file_stem_old = None - if url.startswith("http://cloud-3.steamusercontent.com/"): - file_stem_old = re.sub(r"[^a-zA-Z0-9]", "", url) - url = url.replace( - "http://cloud-3.steamusercontent.com/", - "https://steamusercontent-a.akamaihd.net/", - ) - if url.startswith("https://cloud-3.steamusercontent.com/"): - file_stem_old = re.sub(r"[^a-zA-Z0-9]", "", url) - url = url.replace( - "https://cloud-3.steamusercontent.com/", - "https://steamusercontent-a.akamaihd.net/", - ) - file_stem = re.sub(r"[^a-zA-Z0-9]", "", url) - - files = (mod_dir / folder).glob(f"{file_stem}.*") - files = list(files) - if file_stem_old is not None: - files_old = (mod_dir / folder).glob(f"{file_stem_old}.*") - files_old = list(files_old) - files += files_old - - if len(files) == 0: # file not found - # download the file - download_file(url, target_dir / folder, file_stem) - else: - if len(files) > 1: - print(f"\033[94mMultiple files found for {url}\033[0m") - print(f"\033[94mFiles: {files}\033[0m") - file_path = list(files)[0] - copy_file(file_path, mod_dir, target_dir, file_stem) - - -def sanitize_folder_name(folder_name): - # 移除不允许的字符(假设不允许的字符为:<>:"/\|?*) - sanitized_name = re.sub(r'[<>:"/\\|?*]', "", folder_name) - # 将连续的空格替换为单个空格 - sanitized_name = re.sub(r"\s+", " ", sanitized_name) - return sanitized_name - - -def main(): - json_file = Path(sys.argv[1]) - mod_dir = get_mod_dir(json_file) - - with open(json_file, "r", encoding="utf-8") as f: - data = json.load(f) - - target_dir = Path(__file__).parent / sanitize_folder_name(data["SaveName"]) - thumbnail = json_file.parent / (json_file.stem + ".png") - copy_file(thumbnail, mod_dir, target_dir) - copy_file(json_file, mod_dir, target_dir) - urls = get_all_urls(data) - with concurrent.futures.ThreadPoolExecutor() as executor: - futures = [ - executor.submit(process_url, url, key, mod_dir, target_dir) - for url, key in urls.items() - ] - for future in concurrent.futures.as_completed(futures): - future.result() - - -if __name__ == "__main__": - main() diff --git a/seeker/snippet/vendor_sleuth.sh b/seeker/snippet/vendor_sleuth.sh deleted file mode 100644 index 0e4b8bc5..00000000 --- a/seeker/snippet/vendor_sleuth.sh +++ /dev/null @@ -1,31 +0,0 @@ -#date: 2024-08-29T17:02:18Z -#url: https://api.github.com/gists/0cd89a25c730ac267559c44b5487c9ff -#owner: https://api.github.com/users/spezifisch - -export DEVICE_BASE=$HOME/android/lineage/device/xiaomi/veux -export VENDOR_BASE=$HOME/android/lineage/vendor/xiaomi/veux/proprietary -export STOCK_BASE=$HOME/Dumps/veux-stock-vendor-20240829-1 - -for x in $(cat "$DEVICE_BASE/proprietary-files.txt" | cut -d'|' -f1 | grep -Ev '(^#|^$)'); do - F="$VENDOR_BASE/$x" - - if [ -e "$F" ]; then - #echo "found $x in vendor" - - G="$STOCK_BASE/$x" - if [ -e "$G" ]; then - #echo "found $x in stock" - - if diff -q "$F" "$G" > /dev/null; then - # same files - echo "match-vendor/stock $x" - else - echo "mismatch-vendor/stock $x" - fi - else - echo "missing-compare $x" - fi - else - echo "extraneous $x" - fi -done \ No newline at end of file diff --git a/seeker/snippet/vlm_rag.py b/seeker/snippet/vlm_rag.py deleted file mode 100644 index 6596252c..00000000 --- a/seeker/snippet/vlm_rag.py +++ /dev/null @@ -1,319 +0,0 @@ -#date: 2024-08-29T16:53:11Z -#url: https://api.github.com/gists/f4006d00cc1fcfa237d7f191c940011d -#owner: https://api.github.com/users/sovrasov - -import time -import torch -from torch.utils.data import DataLoader -from tqdm import tqdm -from transformers import AutoProcessor -from PIL import Image -from io import BytesIO - - -if torch.cuda.is_available(): - device = torch.device("cuda") - dtype = torch.bfloat16 -else: - device = torch.device("cpu") - dtype = torch.float32 - - -from torch import nn -from transformers.models.paligemma.modeling_paligemma import PaliGemmaForConditionalGeneration, PaliGemmaPreTrainedModel - -class ColPali(PaliGemmaPreTrainedModel): - def __init__(self, config): - super(ColPali, self).__init__(config=config) - self.model: PaliGemmaForConditionalGeneration = PaliGemmaForConditionalGeneration(config) - self.dim = 128 - self.custom_text_proj = nn.Linear(self.model.config.text_config.hidden_size, self.dim) - self.main_input_name = "doc_input_ids" - - def forward(self, *args, **kwargs): - """ - Forward pass through Llama and the linear layer for dimensionality reduction - - Args: - - input_ids (torch.LongTensor): "**********" - - attention_mask (torch.LongTensor): The attention mask tensor. - - Returns: - - torch.Tensor: "**********" - """ - outputs = self.model(*args, output_hidden_states=True, **kwargs) - last_hidden_states = outputs.hidden_states[-1] - proj = self.custom_text_proj(last_hidden_states) - # normalize l2 norm - proj = proj / proj.norm(dim=-1, keepdim=True) - proj = proj * kwargs["attention_mask"].unsqueeze(-1) - return proj - - - -model_name = "vidore/colpali" -model = ColPali.from_pretrained("google/paligemma-3b-mix-448", torch_dtype=dtype, device_map=device).eval() -model.load_adapter(model_name) -model.to(device) -processor = AutoProcessor.from_pretrained(model_name) - -#BERT_Article.pdf: https://arxiv.org/pdf/1810.04805 -#Transformers_Article.pdf: https://arxiv.org/pdf/1706.03762 - -pdfs = [{"file_name": "data/BERT_Article.pdf"}, {"file_name": "data/Transformers_Article.pdf"}] - - -import requests -from pdf2image import convert_from_path -from pypdf import PdfReader - -def preprocessing(pdfs): - documents = [] - images = [] - metadata = [] - for pdf in pdfs: - file_name = pdf["file_name"] - reader = PdfReader(file_name) - for page_number in range(len(reader.pages)): - page = reader.pages[page_number] - text = page.extract_text() - documents.append(text) - metadata.append({"page": page_number, "file_path": file_name}) - images_for_file = convert_from_path(file_name) - images += images_for_file - assert len(images) == len(documents) - assert len(metadata) == len(documents) - return documents, images, metadata - -documents, images, metadata = preprocessing(pdfs) - -from pdf2image import convert_from_path -from PIL import Image -from torch.utils.data import DataLoader -from tqdm import tqdm -from transformers import AutoProcessor - -def indexing(images): - ds = [] - dataloader = DataLoader( - images, - batch_size=1, - shuffle=False, - collate_fn=lambda x: process_images(processor, x), - ) - for batch_doc in tqdm(dataloader): - with torch.no_grad(): - batch_doc = {k: v.to(device) for k, v in batch_doc.items()} - embeddings_doc = model(**batch_doc) - ds.extend(list(torch.unbind(embeddings_doc.to("cpu")))) - return ds - -# Help function to process the images into the right (data) format -def process_images(processor, images, max_length: int = 50): - texts_doc = ["Describe the image."] * len(images) - images = [image.convert("RGB") for image in images] - - batch_doc = processor( - text=texts_doc, - images=images, - return_tensors="pt", - padding="longest", - max_length=max_length + processor.image_seq_length, - ) - return batch_doc - -index = indexing(images) - - -# The model requires a mock image to be added to the query. -mock_image = Image.new("RGB", (448, 448), (255, 255, 255)) - -def search(query: str, index, documents, images, metadata, k=5): - # text, images, and metadata are just passed without processing - qs = [] - with torch.no_grad(): - batch_query = process_queries(processor, [query], mock_image) - batch_query = {k: v.to(device) for k, v in batch_query.items()} - embeddings_query = model(**batch_query) - qs.extend(list(torch.unbind(embeddings_query.to("cpu")))) - # run evaluation - scores = evaluate_colbert(qs, index) - relevant_pages = torch.topk(scores, k, dim=1, largest=True).indices - relevant_pages = relevant_pages.squeeze() - result = [] - for p in relevant_pages: - result.append({"document": documents[p], "image": images[p], "score": scores[:,p].item(), "metadata": metadata[p]}) - return result - -# Help function to process the queries into the right (data) format -def process_queries(processor, queries, mock_image, max_length: int = 50): - texts_query = [] - for query in queries: - query = f"Question: {query}" - texts_query.append(query) - - batch_query = processor( - images=[mock_image.convert("RGB")] * len(texts_query), - # NOTE: the image is not used in batch_query but it is required for calling the processor - text=texts_query, - return_tensors="pt", - padding="longest", - max_length=max_length + processor.image_seq_length, - ) - del batch_query["pixel_values"] - - batch_query["input_ids"] = batch_query["input_ids"][..., processor.image_seq_length :] - batch_query["attention_mask"] = batch_query["attention_mask"][..., processor.image_seq_length :] - return batch_query - -# Help function to calculate the scores between queries and documents -def evaluate_colbert(qs, ps, batch_size=128) -> torch.Tensor: - scores = [] - for i in range(0, len(qs), batch_size): - scores_batch = [] - qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to(device) - for j in range(0, len(ps), batch_size): - ps_batch = torch.nn.utils.rnn.pad_sequence( - ps[j : j + batch_size], batch_first=True, padding_value=0 - ).to(device) - scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2)) - scores_batch = torch.cat(scores_batch, dim=1).cpu() - scores.append(scores_batch) - scores = torch.cat(scores, dim=0) - return scores - -# Function for image processing -def scale_image(image: Image.Image, new_height: int = 1024) -> Image.Image: - """ - Scale an image to a new height while maintaining the aspect ratio. - """ - # Calculate the scaling factor - width, height = image.size - aspect_ratio = width / height - new_width = int(new_height * aspect_ratio) - - # Resize the image - scaled_image = image.resize((new_width, new_height)) - - return scaled_image - -query = "How many transformers blocks in BERT Base? Justify your answer." -retrieved_documents = search(query=query, index=index, documents=documents, images=images, metadata=metadata, k=3) - - -from IPython.display import display, HTML -import io -import base64 - -def display_images(retrieved_documents): - html = "" - - for r in retrieved_documents: - img = r["image"] # Assuming this is a PIL Image object - title1 = f"File: {r['metadata']['file_path']}" # Extracting the title from metadata - title2 = f"Page: {r['metadata']['page']}" # Extracting the title from metadata - title3 = f"Score: {r['score']}" # Extracting the title from metadata - - # Save the image to a BytesIO object - img_byte_arr = io.BytesIO() - img.save(img_byte_arr, format='PNG') # Save as PNG or any other format - img_byte_arr.seek(0) # Move to the beginning of the BytesIO object - img_data = img_byte_arr.getvalue() - img_base64 = base64.b64encode(img_data).decode('utf-8') # Encode to base64 - - # Create HTML for image with titles above - html += f""" - - """ - - html += "
-
- {title1}
{title2}
{title3} -
- -
" - display(HTML(html)) - -# Example usage -#display_images(retrieved_documents) - -import base64 -import io - -# Function to process images -def get_base64_image(img: str | Image.Image, add_url_prefix: bool = True) -> str: - """ - Convert an image (from a filepath or a PIL.Image object) to a JPEG-base64 string. - """ - if isinstance(img, str): - img = Image.open(img) - elif isinstance(img, Image.Image): - pass - else: - raise ValueError("`img` must be a path to an image or a PIL Image object.") - - buffered = io.BytesIO() - img.save(buffered, format="jpeg") - b64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") - - return f"data:image/jpeg;base64,{b64_data}" if add_url_prefix else b64_data - - -# Format the images in the right format for the prompt -def convert_documents_to_prompt(retrieved_documents): - images_for_vlm = [] - for r in retrieved_documents: - images_for_vlm.append( - { - "type": "image_url", - "image_url": {"url": get_base64_image(r["image"])} - }) - return images_for_vlm - -images_for_vlm = convert_documents_to_prompt(retrieved_documents) - -images_raw = [r["image"] for r in retrieved_documents] - - -from openai import OpenAI - -# Visual Language Model -def vlm(prompt, retrieved_documents): - - images_for_vlm = convert_documents_to_prompt(retrieved_documents) - print(images_for_vlm) - print(prompt) - content = [{"type": "text", "text": prompt}] + images_for_vlm - - client = OpenAI() - response = client.chat.completions.create( - model="gpt-4o-mini", - messages=[ - { - "role": "user", - "content": content - } - ], - max_tokens= "**********" - ) - return response.choices[0].message.content - -from transformers import AutoProcessor, LlavaForConditionalGeneration - -start = time.time() - -model = LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf") -processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf") -prompt = f"USER: {''*len(images_raw)}\n{query} ASSISTANT:" -inputs = processor(text=prompt, images=images_raw, return_tensors="pt") -generate_ids = "**********"=100) -print(processor.batch_decode(generate_ids, skip_special_tokens= "**********"=False)[0]) -print(f"Elapsed {time.time() - start}") - - -#from IPython.display import display, Markdown -#result = vlm(prompt=query, retrieved_documents=retrieved_documents) -#print(result) -#display(Markdown(result))uery, retrieved_documents=retrieved_documents) -#print(result) -#display(Markdown(result)) \ No newline at end of file