diff --git a/seeker/report.txt b/seeker/report.txt index afd23a9a..005514de 100644 --- a/seeker/report.txt +++ b/seeker/report.txt @@ -1,3 +1,29 @@ +-------------------------------------------------------------------------------- + 2024-08-30 17:12:40.941245 +-------------------------------------------------------------------------------- + On branch main +Your branch is up to date with 'origin/main'. + +Changes not staged for commit: + (use "git add/rm ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + deleted: snippet/recept_analysis.py + +Untracked files: + (use "git add ..." to include in what will be committed) + snippet/bench.py + snippet/collect_data_openai_sample_imports.py + snippet/csv2star.py + snippet/disable.bash + snippet/docker_iptables + snippet/forty.java + snippet/mat_demo_v1.py + snippet/radar_chart_example.py + snippet/test.py + snippet/tts_backup.py + +no changes added to commit (use "git add" and/or "git commit -a") + -------------------------------------------------------------------------------- 2024-08-29 17:11:54.148933 -------------------------------------------------------------------------------- diff --git a/seeker/snippet/bench.py b/seeker/snippet/bench.py new file mode 100644 index 00000000..a827e4d0 --- /dev/null +++ b/seeker/snippet/bench.py @@ -0,0 +1,139 @@ +#date: 2024-08-30T16:36:13Z +#url: https://api.github.com/gists/6832dd94f8bb34a1a6a5a20de6af6132 +#owner: https://api.github.com/users/samwho + +import random +import string +import time +from typing import Tuple + +import psycopg2 +from psycopg2.extensions import cursor +from rich.console import Console +from rich.table import Table + +LARGE_STRING = "a" * 64 * 1024 + + +def random_string(length: int = 10) -> str: + return "".join(random.choices(string.ascii_lowercase, k=length)) + + +def create_tables(cur: cursor) -> None: + cur.execute("""CREATE TABLE IF NOT EXISTS int_table + (id INTEGER PRIMARY KEY, value TEXT)""") + cur.execute("""CREATE TABLE IF NOT EXISTS string_table + (id TEXT PRIMARY KEY, value TEXT)""") + + +def truncate_table(cur: cursor, table_name: str) -> None: + cur.execute(f"TRUNCATE TABLE {table_name}") + cur.connection.commit() + + +def insert_data( + cur: cursor, table_name: str, data: list[Tuple[int | str, str]] +) -> float: + total = 0 + truncate_table(cur, table_name) + for record in data: + start_time = time.perf_counter() + cur.execute(f"INSERT INTO {table_name} (id, value) VALUES (%s, %s)", record) + cur.connection.commit() + end_time = time.perf_counter() + total += end_time - start_time + return total + + +def read_data(cur: cursor, table_name: str, ids: list[int | str]) -> float: + total = 0 + for id in ids: + start_time = time.perf_counter() + cur.execute(f"SELECT * FROM {table_name} WHERE id = %s", (id,)) + cur.fetchone() + end_time = time.perf_counter() + total += end_time - start_time + return total + + +def benchmark(num_records: int = 10000, num_reads: int = 1000) -> dict[str, float]: + # PostgreSQL connection parameters + conn_params = { + "dbname": "postgres", + "user": "postgres", + "password": "**********" + "host": "localhost", + "port": "5432", + } + + conn = psycopg2.connect(**conn_params) + cur = conn.cursor() + + # Ensure tables don't exist + cur.execute("DROP TABLE IF EXISTS int_table") + cur.execute("DROP TABLE IF EXISTS string_table") + conn.commit() + + create_tables(cur) + conn.commit() + + ints = list(range(num_records)) + random_ints = ints + random.shuffle(random_ints) + + # Prepare data + int_seq_data = [(i, LARGE_STRING) for i in ints] + int_random_data = [(i, LARGE_STRING) for i in random_ints] + str_seq_data = [(f"{i:010d}", LARGE_STRING) for i in ints] + str_random_data = [(random_string(), LARGE_STRING) for i in ints] + + # Benchmark insertions + int_seq_insert = insert_data(cur, "int_table", int_seq_data) + int_random_insert = insert_data(cur, "int_table", int_random_data) + str_seq_insert = insert_data(cur, "string_table", str_seq_data) + str_random_insert = insert_data(cur, "string_table", str_random_data) + + # Prepare read data + int_seq_ids = [i for i, _ in int_seq_data[:num_reads]] + int_random_ids = [i for i, _ in int_random_data[:num_reads]] + str_seq_ids = [i for i, _ in str_seq_data[:num_reads]] + str_random_ids = [i for i, _ in str_random_data[:num_reads]] + + # Benchmark reads + int_seq_read = read_data(cur, "int_table", int_seq_ids) + int_random_read = read_data(cur, "int_table", int_random_ids) + str_seq_read = read_data(cur, "string_table", str_seq_ids) + str_random_read = read_data(cur, "string_table", str_random_ids) + + cur.close() + conn.close() + + return { + ("int", "sequential", "insert"): int_seq_insert, + ("int", "random", "insert"): int_random_insert, + ("str", "sequential", "insert"): str_seq_insert, + ("str", "random", "insert"): str_random_insert, + ("int", "sequential", "read"): int_seq_read, + ("int", "random", "read"): int_random_read, + ("str", "sequential", "read"): str_seq_read, + ("str", "random", "read"): str_random_read, + } + + +if __name__ == "__main__": + n = 10000 + results = benchmark(num_records=n, num_reads=n) + + table = Table() + + table.add_column("Type", style="cyan", no_wrap=True) + table.add_column("Mode", style="cyan", no_wrap=True) + table.add_column("Operation", style="cyan", no_wrap=True) + table.add_column("Time (seconds)", style="magenta") + + for (type, mode, op), time in results.items(): + table.add_row(type, mode, op, f"{time:.3f}") + + console = Console() + console.print(table) +table) diff --git a/seeker/snippet/collect_data_openai_sample_imports.py b/seeker/snippet/collect_data_openai_sample_imports.py new file mode 100644 index 00000000..9be3ee95 --- /dev/null +++ b/seeker/snippet/collect_data_openai_sample_imports.py @@ -0,0 +1,23 @@ +#date: 2024-08-30T17:12:04Z +#url: https://api.github.com/gists/22d5da6fb48207927a0dc4b8ec62df0d +#owner: https://api.github.com/users/zsasko + +import json +from typing import AsyncGenerator, NoReturn + +import uvicorn +from dotenv import load_dotenv +from fastapi import FastAPI, WebSocket +from fastapi.responses import HTMLResponse +from openai import AsyncOpenAI + +load_dotenv() + +model = "gpt-3.5-turbo" +conversation_history = [] + +app = FastAPI() +client = AsyncOpenAI() + +with open("index.html") as f: + html = f.read() diff --git a/seeker/snippet/csv2star.py b/seeker/snippet/csv2star.py new file mode 100644 index 00000000..f58fec11 --- /dev/null +++ b/seeker/snippet/csv2star.py @@ -0,0 +1,55 @@ +#date: 2024-08-30T16:56:05Z +#url: https://api.github.com/gists/7781ef8063e0102d7121d799efcb700f +#owner: https://api.github.com/users/shahpnmlab + +import os +import glob +import pandas as pd +import starfile + +# Function to read CSV files +def read_csv_files(directory): + all_data = [] + for filename in glob.glob(os.path.join(directory, '*.csv')): + df = pd.read_csv(filename, header=None, names=['X', 'Y', 'Z']) + tomogram_name = os.path.basename(filename).split('_')[1] + df['MicrographName'] = f'TS_{tomogram_name}' + all_data.append(df) + return pd.concat(all_data, ignore_index=True) + +# Read all CSV files +data = read_csv_files('particle_lists') + +# Create the particles data block +particles_data = pd.DataFrame({ + 'rlnMicrographName': data['MicrographName'], + 'rlnCoordinateX': data['X'], + 'rlnCoordinateY': data['Y'], + 'rlnCoordinateZ': data['Z'], + 'rlnOriginXAngst': [0] * len(data), + 'rlnOriginYAngst': [0] * len(data), + 'rlnOriginZAngst': [0] * len(data) +}) + +# Create the optics data block +optics_data = pd.DataFrame({ + 'rlnOpticsGroup': [1,""], + 'rlnOpticsGroupName': ['opticsGroup1',""], + 'rlnSphericalAberration': [2.700000,""], + 'rlnVoltage': [300.000000,""], + 'rlnImagePixelSize': [13.48,""], + 'rlnImageSize': [64,""], + 'rlnImageDimensionality': [3,""], + 'rlnPickingImagePixelSize': [13.48,""] +}) + +# Create the STAR file structure +star_data = { + 'optics': optics_data, + 'particles': particles_data +} + +# Write the STAR file +starfile.write(star_data, 'particles.star', overwrite=True) + +print("particles.star file has been created successfully.") \ No newline at end of file diff --git a/seeker/snippet/disable.bash b/seeker/snippet/disable.bash new file mode 100644 index 00000000..3e9b2ad0 --- /dev/null +++ b/seeker/snippet/disable.bash @@ -0,0 +1,13 @@ +#date: 2024-08-30T17:10:15Z +#url: https://api.github.com/gists/f1825a7e2c54c2c6e46095f95c697285 +#owner: https://api.github.com/users/genzj + +#!/bin/bash + +# ref: +# https://askubuntu.com/a/1167767 +# https://manpages.ubuntu.com/manpages/bionic/man5/NetworkManager.conf.5.html#connectivity%20section + +sudo cp --backup=t /etc/NetworkManager/NetworkManager.conf /etc/NetworkManager/NetworkManager.conf.backup +echo -e "\n[connectivity]\nuri=\n" | sudo tee -a /etc/NetworkManager/NetworkManager.conf +sudo systemctl restart NetworkManager.service \ No newline at end of file diff --git a/seeker/snippet/docker_iptables b/seeker/snippet/docker_iptables new file mode 100644 index 00000000..2990985d --- /dev/null +++ b/seeker/snippet/docker_iptables @@ -0,0 +1,136 @@ +#date: 2024-08-30T16:41:10Z +#url: https://api.github.com/gists/9bb557948c27cf1d486d74d711f28638 +#owner: https://api.github.com/users/Sartan4455 + +#!/bin/bash +# Copyright 2020-2022 Tomas Barton +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -o nounset -o pipefail +# +function -h { + cat <&1 ;} +function out { printf '%s\n' "$*" ;} + +function iptables_apply { + local binary="$1" + local table="$2" + local action="$3" + local rule="$4" + local noop=$5 + local verbose=$6 + + # check if the rule is already defined + eval "${binary} -t ${table} --check ${rule} 2>/dev/null" + if [[ $? -ne 0 ]]; then + if [[ $noop == true ]]; then + msg $rule; + else + if [[ $verbose == true ]]; then + msg "${rule}" + fi + eval "${binary} -t ${table} ${action} ${rule}"; + fi + fi +} + +function main { + local verbose=false + local debug=false + local noop=false + local interface="docker0" + local binary="iptables" + + while [[ $# -gt 0 ]] + do + case "$1" in # Munging globals, beware + -i|--interface) interface="$2"; shift 2 ;; + -b|--binary) binary="$2"; shift 2 ;; + -n|--noop) noop=true; shift 1 ;; + -v|--verbose) verbose=true; shift 1 ;; + -d|--debug) debug=true; shift 1 ;; + *) err 'Argument error. Please see help: -h' ;; + esac + done + + if [[ $debug == true ]]; then + set -x + fi + + if [[ $noop == true ]]; then + msg "NOOP: Only printing iptables rules to be eventually applied" + fi + + # list currently running container IDs + local containers=$(docker ps --format '{{.ID}}') + if [[ ! -z "$containers" ]]; then + while read -r cont; do + # old docker API response + local ip=$(docker inspect -f '{{.NetworkSettings.IPAddress}}' ${cont}) + if [[ -z "${ip}" ]]; then + # newer docker API, probably > 23.01 + ip=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' ${cont}) + fi + if [[ $verbose == true ]]; then + msg "Container ${cont}" + fi + # extract port forwarding + local ports=$(docker inspect -f '{{json .NetworkSettings.Ports}}' ${cont}) + if [[ "${ports}" != "{}" ]]; then + local fwd=$(echo "${ports}" | jq -r '. as $a| keys[] | select($a[.]!=null) as $f | "\($f)/\($a[$f][].HostPort)"') + if [[ ! -z "$fwd" ]]; then + # pass tripples likes `3000/tcp/29956` + while read -r pfwd; do + local dport protocol hport + local IFS="/" + read dport protocol hport <<< "${pfwd}" + if [[ -z "${ip}" ]]; then + err "ERROR: Empty IP for container: ${cont}" + fi + local rule="DOCKER -d ${ip}\/32 ! -i ${interface} -o ${interface} -p ${protocol} -m ${protocol} --dport ${dport} -j ACCEPT" + iptables_apply "${binary}" "filter" "-A" "${rule}" ${noop} ${verbose} + rule="POSTROUTING -s ${ip}\/32 -d ${ip}\/32 -p ${protocol} -m ${protocol} --dport ${dport} -j MASQUERADE" + iptables_apply "${binary}" "nat" "-A" "${rule}" ${noop} ${verbose} + rule="DOCKER ! -i ${interface} -p ${protocol} -m ${protocol} --dport ${hport} -j DNAT --to-destination ${ip}:${dport}" + iptables_apply "${binary}" "nat" "-A" "${rule}" ${noop} ${verbose} + done <<< "$fwd" + fi + fi + done <<< "$containers" + fi +} + +if [[ ${1:-} ]] && declare -F | cut -d' ' -f3 | fgrep -qx -- "${1:-}" +then + case "$1" in + -h|--help) : ;; + *) ;; + esac + "$@" +else + main "$@" +fi diff --git a/seeker/snippet/forty.java b/seeker/snippet/forty.java new file mode 100644 index 00000000..cc3e1e88 --- /dev/null +++ b/seeker/snippet/forty.java @@ -0,0 +1,31 @@ +//date: 2024-08-30T16:55:29Z +//url: https://api.github.com/gists/6efae7ebf845724b89a20348dc9521fc +//owner: https://api.github.com/users/sasub-mlp + +import java.util.Scanner; + +public class forty { + static int arrsearch(int[] arr, int x) { + for (int i=0;i 1: + path = path.interpolated(num_vars) + return Path(self.transform(path.vertices), path.codes) + + class RadarAxes(PolarAxes): + + name = "radar" + PolarTransform = RadarTransform + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # rotate plot such that the first axis is at the top + self.set_theta_zero_location("N") + + def fill(self, *args, closed=True, **kwargs): + """Override fill so that line is closed by default""" + return super().fill(closed=closed, *args, **kwargs) + + def plot(self, *args, **kwargs): + """Override plot so that line is closed by default""" + lines = super().plot(*args, **kwargs) + for line in lines: + self._close_line(line) + + def _close_line(self, line): + x, y = line.get_data() + # FIXME: markers at x[0], y[0] get doubled-up + if x[0] != x[-1]: + x = np.append(x, x[0]) + y = np.append(y, y[0]) + line.set_data(x, y) + + def set_varlabels(self, labels): + self.set_thetagrids(np.degrees(theta), labels) + + def _gen_axes_patch(self): + # The Axes patch must be centered at (0.5, 0.5) and of radius 0.5 + # in axes coordinates. + if frame == "circle": + return Circle((0.5, 0.5), 0.5) + elif frame == "polygon": + return RegularPolygon((0.5, 0.5), num_vars, radius=0.5, edgecolor="k") + else: + raise ValueError("Unknown value for 'frame': %s" % frame) + + def _gen_axes_spines(self): + if frame == "circle": + return super()._gen_axes_spines() + elif frame == "polygon": + # spine_type must be 'left'/'right'/'top'/'bottom'/'circle'. + spine = Spine(axes=self, spine_type="circle", path=Path.unit_regular_polygon(num_vars)) + # unit_regular_polygon gives a polygon of radius 1 centered at + # (0, 0) but we want a polygon of radius 0.5 centered at (0.5, + # 0.5) in axes coordinates. + spine.set_transform(Affine2D().scale(0.5).translate(0.5, 0.5) + self.transAxes) + return {"polar": spine} + else: + raise ValueError("Unknown value for 'frame': %s" % frame) + + register_projection(RadarAxes) + return theta + + +theta = radar_Randomy(4, frame="polygon") +data = np.array( + [ + [0.55192003, 0.94219184, 0.49221465, 0.23299307], + [0.86465455, 0.92235546, 0.07574812, 0.44711059], + [0.41477989, 0.2781122, 0.61265786, 0.6239259], + ] +) + +fig = plt.figure(figsize=(4, 4)) +ax = plt.subplot(projection="radar") + +ax.set_rgrids([0.2, 0.4, 0.6, 0.8]) +ax.set_varlabels(["Axis 1", "Axis 2 ", "Axis 3", " Axis 4"]) +for idx in range(len(data)): + ax.plot(theta, data[idx]) + ax.fill(theta, data[idx], alpha=0.25, label="_nolegend_") + +labels = ("Random 1", "Random 2", "Random 3") +legend = ax.legend(labels, loc=(0.9, 0.95), fontsize="small") \ No newline at end of file diff --git a/seeker/snippet/recept_analysis.py b/seeker/snippet/recept_analysis.py deleted file mode 100644 index 57ee9e42..00000000 --- a/seeker/snippet/recept_analysis.py +++ /dev/null @@ -1,196 +0,0 @@ -#date: 2024-08-28T16:54:46Z -#url: https://api.github.com/gists/e0e23622f18375c45a029868b40423da -#owner: https://api.github.com/users/eggaskin - -import seaborn as sns -import pandas as pd -import numpy as np -import matplotlib.pyplot as plt - - -""" -This code provides functions for analyzing textual conversation-format data that is split turn by turn. -The receptive.R script should have been run first on conversation data and saved to utterrecepts (receptiveness of different utterances). -This was first used on the utterances.csv file from the persuasion-for-good dataset. - -Interesting and helpful papers for theory of mind and question-based prompting: -https://arxiv.org/pdf/2310.01468 -https://arxiv.org/pdf/2310.03051 -https://aclanthology.org/2023.conll-1.25.pdf -https://arxiv.org/ftp/arxiv/papers/2309/2309.01660.pdf -https://arxiv.org/pdf/2302.02083 - -""" - -def getconvtext(speaker,utterances,casino=True): - # DIALOGUE ID is the conv number - utterances = utterances[utterances['speaker'] == speaker] # first or second speaker? does it matter? - # get unique dialogue id vals - uniqueconv = utterances['dialogue_id'].unique() - sents = [] - for i in uniqueconv: - convid = i - speakerconvtext = utterances.loc[(utterances['dialogue_id'] == convid) & (utterances['speaker'] == speaker)] - # join all sentences in text column - sents.append((convid,speakerconvtext['text'].tolist())) - return sents - -# other csv's that can be helpful for analysis -#charityppl = pd.read_csv('data/full_info.csv') # full info on each speaker -allfeats = pd.read_csv('testout.csv') # full output of receptiveness script if testing binary variables -qualdf = pd.read_csv('qualsort.csv') # sorted based on magnitude of some speaker quality - -def shift(one,two,avgforward=None): - # helps shifts conversation arrays to be the same length so we are comparing responses in order. - - if avgforward: - if len(one) > len(two): - # make sure to also shift down one - one,two= one[:len(two)],two - elif len(one) < len(two): - one,two= one,two[:len(one)] - - # get the average receptive over next avgforward utterances for each utterance - lens = len(one)-avgforward - avg1 = [] - avg2 = [] - for i in range(lens): - avg1.append(np.mean(two[i:i+avgforward])) - avg2.append(np.mean(one[i:i+avgforward])) - return avg1,avg2 - - if len(one) > len(two): - # make sure to also shift down one - return one[:len(two)],two - elif len(one) < len(two): - return one,two[:len(one)] - elif len(one) == len(two): - return one,two - -qualities = ["agreeableness_y","openness-to-experiences_x","extraversion","emotional-stability","conscientiousness"] - -def receptive_v_qual(quality): - # plots for each level of a quality (1-7), the spread of how receptive speaker's utterances are. - - quals = [qualdf.loc[qualdf['speaker_id']==sp][quality] for sp in allfeats['speaker'].tolist()] - plt.plot(allfeats['receptive'],quals,'o') - plt.xlabel('Receptiveness') - plt.ylabel(quality) - plt.show() - -# the main dataframe with the receptiveness scores. -ur = pd.read_csv('utterrecepts.csv') -# drop annotations col -ur = ur.drop(columns=['annotations', "speaker_internal_id","id","reply.to"]) -conversations = ur['conversation_id'].unique() - -def plot_1v2_recept(convs): - # plots the correlation of the 1st vs 2nd speaker. - # this mostly serves to obtain general correlation scores for general analysis. - - convdict = {} - x = np.array([]) - y = np.array([]) - - indcorrs = [] - spekrs = [] - - for c in convs: - conv = ur.loc[ur['conversation_id'] == c] - spkrs = conv['speaker'].unique() - spekrs.append(spkrs) - - recs_speaker_0 = conv.loc[conv['speaker_id'] == spkrs[0], 'receptive'][:-1] - recs_speaker_1 = conv.loc[conv['speaker_id'] == spkrs[1], 'receptive'][:-1] - - recs_speaker_0,recs_speaker_1 = shift(recs_speaker_0,recs_speaker_1,avgforward=2) - convdict[c] = (np.array(recs_speaker_0),np.array(recs_speaker_1)) - x = np.concatenate((x,recs_speaker_0)) - y = np.concatenate((y,recs_speaker_1)) - - indcorrs.append(np.corrcoef(np.array(recs_speaker_0),np.array(recs_speaker_1))[0][1]) - - if len(recs_speaker_1) != len(recs_speaker_0): - print(recs_speaker_1,recs_speaker_0) - print(len(recs_speaker_1),len(recs_speaker_0)) - break - - # Plot the difference in derivatives - plt.scatter(recs_speaker_0,recs_speaker_1,alpha=0.2) - plt.xlabel('1st speaker receptiveness') - plt.ylabel('2nd speaker receptiveness') - - plt.show() - return indcorrs - -indcorrs = plot_1v2_recept(conversations) - -# kde plot to show the distirbution of correlation in conversations. -# Typically a centered bi-modal distribution -sns.kdeplot(indcorrs,fill=True) -plt.title('correlation per conversation distribution') - -def receptive_ex_case(): # a case study to show how the receptive difference works - # for utterance_0 conversation_id, plot receptiveness of both speakers over conversation - conv = ur.loc[ur['conversation_id'] == 'utterance_0'] - - # Get the receptive scores for each speaker - # unique speaker vals - spkrs = conv['speaker'].unique() - recs_speaker_0 = conv.loc[conv['speaker_id'] == spkrs[0], 'receptive'][:-1] - recs_speaker_1 = conv.loc[conv['speaker_id'] == spkrs[1], 'receptive'][:-1] - - # Plot the lines - plt.plot(recs_speaker_0, label='Speaker 0') - plt.plot(recs_speaker_1, label='Speaker 1') - - # Add labels and legend - plt.xlabel('Turn') - plt.ylabel('Receptive Score') - plt.legend() - - # Show the plot - plt.show() - - difference = np.array(recs_speaker_0) - np.array(recs_speaker_1) - return difference - -difference = receptive_ex_case() # this will represent the rec.diff. in a conversation that is very correlated. - -def plot_recept_overtime(convs): # - """ - This will plot the difference in derivatives of the two speaker's receptiveness scores, - meaning the general trend of the speaker's changes in receptiveness. - Meaning, if they are changing in tandem or similarly this graph will be low in magnitude. - - For conversations whose speakers do follow each other closely in receptiveness as shown in the example above, - this will be a small value close to 0-0.2. - """ - convdict = {} - grads = [] - - for c in convs: - conv = ur.loc[ur['conversation_id'] == c] - spkrs = conv['speaker'].unique() - recs_speaker_0 = conv.loc[conv['speaker_id'] == spkrs[0], 'receptive'][:-1] - recs_speaker_1 = conv.loc[conv['speaker_id'] == spkrs[1], 'receptive'][:-1] - - convdict[c] = (np.array(recs_speaker_0),np.array(recs_speaker_1)) - - minlen = min(len(recs_speaker_0),len(recs_speaker_1)) - # chop both to that size - recs_speaker_0 = recs_speaker_0[:minlen] - recs_speaker_1 = recs_speaker_1[:minlen] - - grads.append(np.array(recs_speaker_0) - np.array(recs_speaker_1)) - - # Plot the difference in derivatives - plt.plot(abs(np.array(recs_speaker_0) - np.array(recs_speaker_1)),alpha=0.05) - plt.xlabel('Turn') - plt.ylabel('Diff of receptiveness plain') - - plt.plot(abs(difference),alpha=1,color='black') - plt.show() - -# a low score for the plot above means that the two speakers receptiveness follow each other closely -# turn by turn, so one changes and the other also changes accordingly. diff --git a/seeker/snippet/test.py b/seeker/snippet/test.py new file mode 100644 index 00000000..11b9793c --- /dev/null +++ b/seeker/snippet/test.py @@ -0,0 +1,129 @@ +#date: 2024-08-30T17:10:20Z +#url: https://api.github.com/gists/215f0c315c532c90b8e7d1310596834a +#owner: https://api.github.com/users/youkaichao + +import torch +from typing import Optional + +from torch._dynamo.backends.common import aot_autograd + +@torch.library.custom_op("custom::paged_attention", mutates_args=[]) +def paged_attention(x: "**********": torch.Tensor, cache: torch.Tensor) -> torch.Tensor: + output = torch.empty_like(x) + num_prefill_tokens = "**********" + bs = x.size(0) + "**********" "**********" "**********" "**********" "**********"i "**********"f "**********" "**********"n "**********"u "**********"m "**********"_ "**********"p "**********"r "**********"e "**********"f "**********"i "**********"l "**********"l "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********"s "**********" "**********"= "**********"= "**********" "**********"0 "**********": "**********" + ... # call decode attention + else: + ... # call prefill attention with x[: "**********" + ... # call decode attention with x[num_prefill_tokens: "**********" + return output + +@paged_attention.register_fake +def _(x: "**********": torch.Tensor, cache: torch.Tensor): + return torch.empty_like(x) + +def attention(x: "**********": torch.Tensor, cache: Optional[torch.Tensor] = None): + if cache is not None: + return torch.ops.custom.paged_attention(x, num_prefill_tokens, cache) + return x * 2 + +eager_model = True + +def custom_compiler(gm, inputs): + + # compilation options + # option 1: pass the full graph to inductor + # option 2: run the model in eager mode + # option 3: find subgraph and replace with kernels inside vLLM + + print(gm._graph.python_code(root_module="self", verbose=True).src) + + # selction logic + static_shape_graphs = dict() + dynamic_shape_graph = None + def forward(*args, **kwargs): + nonlocal static_shape_graphs, dynamic_shape_graph + batchsize = ... # Question: how to get batchsize from args? + if dynamic_shape_graph is None: + # if the input is symbolic shape, compile with dynamic shape support + dynamic_shape_graph = gm.forward + + if eager_model: + return dynamic_shape_graph(*args, **kwargs) + + if batchsize not in static_shape_graphs: + # if the input is static shape, compile with static shape support + static_shape_graphs[batchsize] = gm.forward + return static_shape_graphs[batchsize](*args, **kwargs) + + return forward + +def target_fn(x, num_prefill_tokens: "**********": + x = (x + 1) * 5 + if cache is not None: + x = "**********" + else: + x = x * 2 + x = x.sin() + x = x.cos() + return x + +compiled_target_fn = torch.compile(backend=aot_autograd(fw_compiler=custom_compiler))(target_fn) + +compiled_codes = [] + +def hook(old_colde, new_code): + if old_colde is target_fn.__code__: + compiled_codes.append(new_code) + +torch._dynamo.convert_frame.register_bytecode_hook(hook) + +def dispatcher(x, num_prefill_tokens: "**********": + if len(compiled_codes) < 2: + return compiled_target_fn(x, num_prefill_tokens, cache) + else: + target_fn.__code__ = compiled_codes[1] + return target_fn(x, num_prefill_tokens, cache) + +def test(): + + # profile run, without kv cache, fully static shape, max size + num_prefill_tokens = "**********"=torch.int32) + dispatcher(torch.randn(20, 10), num_prefill_tokens, None) + + # create cache + cache = torch.randn(1, 10) + + # warmup run, mark the input tensor as dynamic + x = torch.randn(10, 10) + torch._dynamo.mark_dynamic(x, 0) + num_prefill_tokens = "**********"=torch.int32) + out = "**********" + print(out) + + # the following run with not trigger Dynamo/Aot Autograd + + # if we are using `--enforce-eager`, we want this to directly run + # with compiled kernel that can handle dynamic shape + y = torch.randn(5, 10) + num_prefill_tokens = "**********"=torch.int32) + out = "**********" + print(out) + + eager_model = False + + # if we are using cudagraph, this is an additional warmup to capture cuda graph + for i in [1, 2, 4, 8, 16]: + y = torch.randn(i, 10) + num_prefill_tokens = "**********"=torch.int32) + out = "**********" + # and then, for later runs, we can directly run with compiled kernel if the shape + # matches the recorded shape. if not, run with dynamic shape + y = torch.randn(4, 10) + num_prefill_tokens = "**********"=torch.int32) + out = "**********" + print(out) + +if __name__ == "__main__": + test() diff --git a/seeker/snippet/tts_backup.py b/seeker/snippet/tts_backup.py new file mode 100644 index 00000000..ddca3774 --- /dev/null +++ b/seeker/snippet/tts_backup.py @@ -0,0 +1,182 @@ +#date: 2024-08-30T16:44:48Z +#url: https://api.github.com/gists/fabf3408c00b3d37464248378f5decbf +#owner: https://api.github.com/users/DerKleineLi + +import concurrent.futures +import json +import mimetypes +import re +import shutil +import sys +from pathlib import Path + +import magic +import requests + +URL_FOLDER = { + "ColliderURL": ["Models", "Models Raw"], + "DiffuseURL": ["Images", "Images Raw"], + "AssetbundleURL": ["Assetbundles"], + "Nickname": [], + "AssetbundleSecondaryURL": ["Assetbundles"], + "ImageURL": ["Images", "Images Raw"], + "MeshURL": ["Models", "Models Raw"], + "SkyURL": ["Images", "Images Raw"], + "BackURL": ["Images", "Images Raw"], + "URL": ["Images", "Images Raw"], + "FaceURL": ["Images", "Images Raw"], + "ImageSecondaryURL": ["Images", "Images Raw"], + "Item1": ["Audio"], + "NormalURL": ["Images", "Images Raw"], + "PDFUrl": ["PDF"], +} +FOLDER_EXT = { + "Models": ".obj", + "Assetbundles": ".unity3d", + "PDF": ".pdf", +} + + +def get_mod_dir(json_file): + while json_file.name != "Mods": + json_file = json_file.parent + return json_file + + +def copy_file(file, mod_dir, target_dir, target_name=None): + target_file = target_dir / file.relative_to(mod_dir) + if target_name is not None: + target_file = target_file.with_stem(target_name) + target_file.parent.mkdir(parents=True, exist_ok=True) + shutil.copy(file, target_file) + print(f"Copied {target_file.relative_to(target_dir)}") + + +def get_all_urls(data): + urls = {} + if not isinstance(data, dict): + return urls + for key, value in data.items(): + if isinstance(value, dict): + urls.update(get_all_urls(value)) + elif isinstance(value, list): + for item in value: + urls.update(get_all_urls(item)) + elif isinstance(value, str) and value.startswith("http"): + urls[value] = key + return urls + + +def download_file(url, target_dir, file_stem): + try: + # 发送 HTTP GET 请求下载文件 + response = requests.get(url, stream=True) + response.raise_for_status() # 检查请求是否成功 + + # # 获取内容类型并确定文件扩展名 + folder = target_dir.name + first_2048_bytes = response.raw.read(2048) + if folder in FOLDER_EXT: + ext = FOLDER_EXT[folder] + else: + # content_type = response.headers.get("content-type") + # ext = mimetypes.guess_extension(content_type) + # if ext is None: + # ext = ".bin" # 默认扩展名 + # 使用前 2048 字节来检测文件类型 + mime = magic.Magic(mime=True) + content_type = mime.from_buffer(first_2048_bytes) + ext = mimetypes.guess_extension(content_type) + if ext is None: + ext = ".bin" # 默认扩展名 + + # 确定文件名 + file_name = file_stem + ext + file_path = target_dir / file_name + target_dir.mkdir(parents=True, exist_ok=True) + + # 保存文件 + with open(file_path, "wb") as file: + file.write(first_2048_bytes) + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + + print(f"Downloaded {url}") + except Exception as e: + print(f"\033[91mFailed to download {url}\033[0m") + + +def process_url(url, key, mod_dir, target_dir): + if not key in URL_FOLDER: + print(f"Key {key} not found in URL_FOLDER") + print(f"URL: {url}") + return + + folder = URL_FOLDER[key] + if len(folder) == 0: + return + folder = folder[0] # the Raw folder is not considered + file_stem_old = None + if url.startswith("http://cloud-3.steamusercontent.com/"): + file_stem_old = re.sub(r"[^a-zA-Z0-9]", "", url) + url = url.replace( + "http://cloud-3.steamusercontent.com/", + "https://steamusercontent-a.akamaihd.net/", + ) + if url.startswith("https://cloud-3.steamusercontent.com/"): + file_stem_old = re.sub(r"[^a-zA-Z0-9]", "", url) + url = url.replace( + "https://cloud-3.steamusercontent.com/", + "https://steamusercontent-a.akamaihd.net/", + ) + file_stem = re.sub(r"[^a-zA-Z0-9]", "", url) + + files = (mod_dir / folder).glob(f"{file_stem}.*") + files = list(files) + if file_stem_old is not None: + files_old = (mod_dir / folder).glob(f"{file_stem_old}.*") + files_old = list(files_old) + files += files_old + + if len(files) == 0: # file not found + # download the file + download_file(url, target_dir / folder, file_stem) + else: + if len(files) > 1: + print(f"\033[94mMultiple files found for {url}\033[0m") + print(f"\033[94mFiles: {files}\033[0m") + file_path = list(files)[0] + copy_file(file_path, mod_dir, target_dir, file_stem) + + +def sanitize_folder_name(folder_name): + # 移除不允许的字符(假设不允许的字符为:<>:"/\|?*) + sanitized_name = re.sub(r'[<>:"/\\|?*]', "", folder_name) + # 将连续的空格替换为单个空格 + sanitized_name = re.sub(r"\s+", " ", sanitized_name) + return sanitized_name + + +def main(): + json_file = Path(sys.argv[1]) + mod_dir = get_mod_dir(json_file) + + with open(json_file, "r", encoding="utf-8") as f: + data = json.load(f) + + target_dir = Path(__file__).parent / sanitize_folder_name(data["SaveName"]) + thumbnail = json_file.parent / (json_file.stem + ".png") + copy_file(thumbnail, mod_dir, target_dir) + copy_file(json_file, mod_dir, target_dir) + urls = get_all_urls(data) + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [ + executor.submit(process_url, url, key, mod_dir, target_dir) + for url, key in urls.items() + ] + for future in concurrent.futures.as_completed(futures): + future.result() + + +if __name__ == "__main__": + main()