diff --git a/seeker/report.txt b/seeker/report.txt index 48a3a4cf..afd23a9a 100644 --- a/seeker/report.txt +++ b/seeker/report.txt @@ -1,3 +1,51 @@ +-------------------------------------------------------------------------------- + 2024-08-29 17:11:54.148933 +-------------------------------------------------------------------------------- + On branch main +Your branch is up to date with 'origin/main'. + +Changes not staged for commit: + (use "git add/rm ..." to update what will be committed) + (use "git restore ..." to discard changes in working directory) + deleted: snippet/3proxy.sh + deleted: snippet/Merge Cells in Word using Python.py + deleted: snippet/Welcome-to-bulkOS.sh + deleted: snippet/airwork_test.py + deleted: snippet/airwrk.py + deleted: snippet/alpine-container.sh + deleted: snippet/arch-container.sh + deleted: snippet/asdf.py + deleted: snippet/bill_req_patient.go + deleted: snippet/code.py + deleted: snippet/git checkout-all-branches.sh + deleted: snippet/install.sh + deleted: snippet/neovim.sh + deleted: snippet/pomo.py + deleted: snippet/sharepoint_connection.py + deleted: snippet/slotVerse1withgui.py + deleted: snippet/slotVerse2nogui.py + deleted: snippet/ssacli_to_json.py + deleted: snippet/thirty_seven.java + deleted: snippet/ubuntu-container.sh + +Untracked files: + (use "git add ..." to include in what will be committed) + snippet/WikiLocation.java + snippet/WikiParser.java + snippet/adb-shell-as-root-dump-vendor.sh + snippet/chat.py + snippet/error-handling-flask.py + snippet/flask.py + snippet/pipes_1.py + snippet/repack.sh + snippet/repackepub.sh + snippet/runner.go + snippet/spbpu_schedule_to_google_calendar.py + snippet/vendor_sleuth.sh + snippet/vlm_rag.py + +no changes added to commit (use "git add" and/or "git commit -a") + -------------------------------------------------------------------------------- 2024-08-28 17:11:23.503361 -------------------------------------------------------------------------------- diff --git a/seeker/snippet/3proxy.sh b/seeker/snippet/3proxy.sh deleted file mode 100644 index f54b75c4..00000000 --- a/seeker/snippet/3proxy.sh +++ /dev/null @@ -1,144 +0,0 @@ -#date: 2024-08-27T17:07:26Z -#url: https://api.github.com/gists/bd0eb83b0ac018f200299aa140fbff55 -#owner: https://api.github.com/users/tuwibu - -#!/bin/sh -random() { - tr proxy.txt <$WORKDIR/data.txt -gen_iptables >$WORKDIR/boot_iptables.sh -gen_ifconfig >$WORKDIR/boot_ifconfig.sh -chmod +x ${WORKDIR}/boot_*.sh /etc/rc0.d - -gen_3proxy >/etc/3proxy/3proxy.cfg -ulimit -S -n 4096 -/etc/init.d/3proxy start - -gen_proxy_file_for_user - -#upload_proxy -install_jq && upload_2file \ No newline at end of file diff --git a/seeker/snippet/Merge Cells in Word using Python.py b/seeker/snippet/Merge Cells in Word using Python.py deleted file mode 100644 index 3a20539e..00000000 --- a/seeker/snippet/Merge Cells in Word using Python.py +++ /dev/null @@ -1,51 +0,0 @@ -#date: 2024-08-27T16:59:28Z -#url: https://api.github.com/gists/7f7b7249869b6a11fbba48c57cff19e1 -#owner: https://api.github.com/users/aspose-com-kb - -import aspose.words as aw -import aspose.pydrawing as drawing - -def mergeCells(startCell: aw.tables.Cell, endCell: aw.tables.Cell): - - parentTable = startCell.parent_row.parent_table - - # Find the start and end cell position - startCellPos = drawing.Point(startCell.parent_row.index_of(startCell), parentTable.index_of(startCell.parent_row)) - endCellPos = drawing.Point(endCell.parent_row.index_of(endCell), parentTable.index_of(endCell.parent_row)) - - # Create a range of cells - mergeRange = drawing.Rectangle( - min(startCellPos.x, endCellPos.x), - min(startCellPos.y, endCellPos.y), - abs(endCellPos.x - startCellPos.x) + 1, - abs(endCellPos.y - startCellPos.y) + 1) - - for row in parentTable.rows: - row = row.as_row() - for cell in row.cells: - cell = cell.as_cell() - currentPos = drawing.Point(row.index_of(cell), parentTable.index_of(row)) - - # Merge the cell if inside the range - if mergeRange.contains(currentPos): - cell.cell_format.horizontal_merge = aw.tables.CellMerge.FIRST if currentPos.x == mergeRange.x else aw.tables.CellMerge.PREVIOUS - cell.cell_format.vertical_merge = aw.tables.CellMerge.FIRST if currentPos.y == mergeRange.y else aw.tables.CellMerge.PREVIOUS - -# Load the license -wordLic = aw.License() -wordLic.set_license("license.lic") - -tableDoc = aw.Document("Table.docx") - -table = tableDoc.first_section.body.tables[0] - -# Define start and end cell for the range -cellStartRange = table.rows[0].cells[0] -cellEndRange = table.rows[1].cells[1] - -# Merge cells -mergeCells(cellStartRange, cellEndRange) - -tableDoc.save("output.docx") - -print ("Table cells merged successfully") \ No newline at end of file diff --git a/seeker/snippet/Welcome-to-bulkOS.sh b/seeker/snippet/Welcome-to-bulkOS.sh deleted file mode 100644 index 180114c8..00000000 --- a/seeker/snippet/Welcome-to-bulkOS.sh +++ /dev/null @@ -1,10 +0,0 @@ -#date: 2024-08-27T17:05:27Z -#url: https://api.github.com/gists/e4b4992a3ea35e5fe5edb687c82b906e -#owner: https://api.github.com/users/rmassaroni - -echo " ________ __ __ __ __ __ _______ _______ " -echo "| | | |.-----.| |.----.-----.--------.-----. | |_.-----. | |--.--.--.| | |--.| | __|" -echo "| | | || -__|| || __| _ | | -__| | _| _ | | _ | | || | < | - |__ |" -echo "|________||_____||__||____|_____|__|__|__|_____| |____|_____| |_____|_____||__|__|__||_______|_______|" -echo -n "by RJM---------------------------------------------------------------------------------------" ; echo "$(date "+%a %b %d %Y")" -echo \ No newline at end of file diff --git a/seeker/snippet/WikiLocation.java b/seeker/snippet/WikiLocation.java new file mode 100644 index 00000000..4bf32c46 --- /dev/null +++ b/seeker/snippet/WikiLocation.java @@ -0,0 +1,88 @@ +//date: 2024-08-29T17:03:49Z +//url: https://api.github.com/gists/a52fb5d70dcef6d431363b0f431de4e6 +//owner: https://api.github.com/users/pagetronic + +package live.page.wiki; + +import live.page.hubd.system.json.Json; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class WikiLocation { + public static List findLocation(Json data) { + for (String key : data.keySet()) { + if (key.matches("object location|object location dec|location|location dec|camera location|camera location dec")) { + List coordinates = convertCoordinates(data.getList(key)); + if (coordinates != null) { + return coordinates; + } + } + if (Json.class.isAssignableFrom(data.get(key).getClass())) { + List coordinates = findLocation(data.getJson(key)); + if (coordinates != null) { + return coordinates; + } + } + if (List.class.isAssignableFrom(data.get(key).getClass()) && + !data.getList(key).isEmpty()) { + for (Object item : data.getList(key)) { + if (item != null && Json.class.isAssignableFrom(item.getClass())) { + List coordinates = findLocation((Json) item); + if (coordinates != null) { + return coordinates; + } + } + } + } + } + return null; + } + + + private static List convertCoordinates(List coordinates) { + if (coordinates == null) { + return null; + } + coordinates = new ArrayList<>(coordinates); + for (int key : new int[]{8, 2}) { + if (coordinates.size() > key) { + for (String start : new String[]{ + "source", "alt", "type", + "heading", "region", "zoom", "scale", + "...", "sl", "dim", "view"}) { + if (coordinates.get(key).trim().toLowerCase().startsWith(start) || coordinates.get(key).trim().isEmpty()) { + coordinates.remove(key); + break; + } + } + } + } + + try { + if (coordinates.size() >= 8 && + (coordinates.get(3).equals("N") || coordinates.get(3).equals("S")) && + (coordinates.get(7).equals("E") || coordinates.get(7).equals("W")) + ) { + return Arrays.asList( + convertCoordinates(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1)), Double.parseDouble(coordinates.get(2)), coordinates.get(3)), + convertCoordinates(Double.parseDouble(coordinates.get(4)), Double.parseDouble(coordinates.get(5)), Double.parseDouble(coordinates.get(6)), coordinates.get(7))); + } else { + return Arrays.asList(Double.parseDouble(coordinates.get(0)), Double.parseDouble(coordinates.get(1))); + } + } catch (Exception e) { + e.printStackTrace(); + } + return null; + } + + private static double convertCoordinates(double degree, double minute, double second, String heading) { + double decimalDegrees = degree + (minute / 60.0) + (second / 3600.0); + if ("W".equals(heading) || "S".equals(heading)) { + decimalDegrees = -decimalDegrees; + } + + return decimalDegrees; + } +} diff --git a/seeker/snippet/WikiParser.java b/seeker/snippet/WikiParser.java new file mode 100644 index 00000000..4df00038 --- /dev/null +++ b/seeker/snippet/WikiParser.java @@ -0,0 +1,117 @@ +//date: 2024-08-29T17:03:49Z +//url: https://api.github.com/gists/a52fb5d70dcef6d431363b0f431de4e6 +//owner: https://api.github.com/users/pagetronic + +package live.page.wiki; + +import info.bliki.wiki.filter.PlainTextConverter; +import info.bliki.wiki.model.WikiModel; +import live.page.hubd.system.json.Json; +import live.page.hubd.system.utils.Fx; +import org.apache.commons.text.StringEscapeUtils; + +import java.io.IOException; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class WikiParser extends WikiModel { + + final Json data = new Json(); + + public WikiParser() { + super("", ""); + } + + public static Json getInfos(String title, String text) { + WikiParser wikiModel = new WikiParser(); + try { + wikiModel.render(new PlainTextConverter(), text, new StringBuilder(), true, true); + } catch (IOException e) { + Fx.log("\n#{" + url(title) + "}"); + } + + Json data = new Json(); + + if (wikiModel.data.containsKey("information")) { + + Json information = wikiModel.data.getJson("information"); + + Json description = new Json(); + if (information.containsKey("description") && String.class.isAssignableFrom(information.get("description").getClass())) { + description.put("int", information.getString("description")); + } else { + description = information.getJson("description"); + if (description != null && description.containsKey("langswitch")) { + description = description.getJson("langswitch"); + } + } + data.put("description", description); + + if (description == null || description.isEmpty()) { + Fx.log("\nD{" + url(title) + "}"); + data.put("description", StringEscapeUtils.unescapeHtml4(title).split("\\.")[0]); + } + if (information.containsKey("author")) { + data.put("author", information.get("author")); + } + + } + List coordinates = WikiLocation.findLocation(wikiModel.data); + if (coordinates == null) { + Fx.log("\nL{" + url(title) + "}"); + } + data.put("coordinates", coordinates); + data.put("data", wikiModel.data); + return data; + } + + private static String url(String title) { + return " https://commons.wikimedia.org/wiki/" + URLEncoder.encode(StringEscapeUtils.unescapeHtml4(title), StandardCharsets.UTF_8).replace("+", "%20") + " "; + } + + + @Override + public void substituteTemplateCall(String templateName, Map parameterMap, Appendable writer) throws IOException { + + writer.append("@@Template@").append(templateName.toLowerCase().trim()).append("@"); + + Json params = new Json(); + + for (String key : parameterMap.keySet()) { + + WikiParser model = new WikiParser(); + StringBuilder builder = new StringBuilder(); + model.render(new PlainTextConverter(), parameterMap.get(key), builder, true, false); + String str = builder.toString().replace("[\r\n ]+", " ").replaceAll(" +", " ").trim(); + writer.append(str); + Matcher match = Pattern.compile("@@Template@([^@]+)@", Pattern.MULTILINE).matcher(builder.toString()); + Json done = new Json(); + while (match.find()) { + if (data.containsKey(match.group(1).toLowerCase().trim())) { + done.put(match.group(1).toLowerCase().trim(), data.get(match.group(1).toLowerCase().trim())); + data.remove(match.group(1).toLowerCase().trim()); + } + } + if (!key.equals("prec") && !key.equals("wikidata")) { + if (!done.isEmpty()) { + params.put(key.toLowerCase().trim(), done); + } else if (!str.isEmpty()) { + params.put(key.toLowerCase().trim(), str); + } + } + } + + if (params.size() == 1 && params.containsKey("1")) { + data.put(templateName.toLowerCase().trim(), params.get("1")); + } else if (params.keySet().stream().allMatch(name -> name.matches("[0-9]+"))) { + data.put(templateName.toLowerCase().trim(), params.values().stream().toList()); + } else { + data.put(templateName.toLowerCase().trim(), params); + } + } + +} \ No newline at end of file diff --git a/seeker/snippet/adb-shell-as-root-dump-vendor.sh b/seeker/snippet/adb-shell-as-root-dump-vendor.sh new file mode 100644 index 00000000..92ebd37a --- /dev/null +++ b/seeker/snippet/adb-shell-as-root-dump-vendor.sh @@ -0,0 +1,5 @@ +#date: 2024-08-29T17:02:18Z +#url: https://api.github.com/gists/0cd89a25c730ac267559c44b5487c9ff +#owner: https://api.github.com/users/spezifisch + +tar -cvzf /sdcard/proprietary-files.tar.gz /odm/etc /product/app /product/priv-app /system/etc /system_ext/app /system_ext/bin /system_ext/etc /system_ext/framework /system_ext/lib64 /system_ext/priv-app /system/framework /vendor/app /vendor/bin /vendor/etc /vendor/firmware /vendor/gpu /vendor/lib64 /vendor/lib \ No newline at end of file diff --git a/seeker/snippet/airwork_test.py b/seeker/snippet/airwork_test.py deleted file mode 100644 index 125dca3b..00000000 --- a/seeker/snippet/airwork_test.py +++ /dev/null @@ -1,91 +0,0 @@ -#date: 2024-08-27T16:45:31Z -#url: https://api.github.com/gists/373dd8d79f9319eccd04c979cf2a9396 -#owner: https://api.github.com/users/nahid111 - -from typing import List - -""" -Problem 1: Add Digits -Given a non-negative integer num, repeatedly add all its digits until the result has only one digit. - -Example 1: - -Input: num = 38 -Output: 2 -Explanation: The process is as follows: 3 + 8 = 11, then 1 + 1 = 2. Since 2 has only one digit, 2 is returned. -""" - - -def add_digits(num: int) -> int: - if num == 0: - return num - - num = str(num) - - if len(num) == 1: - return int(num) - - res = 0 - for n in num: - res += int(n) - - return add_digits(res) - - -""" -Problem 2: Contains Duplicate -Given an integer array nums, return true if any value appears at least twice in the array, and return false if every element is distinct. - -Example 1: - -Input: nums = [1,2,3,1] -Output: true -Explanation: The value 1 appears twice in the array. -""" - - -def contains_duplicate(nums: List[int]) -> bool: - if not nums: - return False - - cache_ = {} - - for i in nums: - if i in cache_: - return True - else: - cache_[i] = 1 - - return False - - -""" -Problem 3: Reverse Vowels of a String -Write a function that takes a string as input and reverses only the vowels of a string. - -Example 1: - -Input: s = "algorithm" -Output: "ilgorathm" -Explanation: The vowels "e" and "o" are reversed. -""" - - -def reverse_vowels(word: str): - if word == "": - return "" - - word = list(word) - vowels = ['a', 'A', 'e', 'E', 'i', 'I', 'o', 'O', 'u', 'U'] - left, right = 0, len(word)-1 - - while left < right: - if word[left] in vowels: - for c in range(right, left, -1): - if word[c] in vowels: - word[left], word[c] = word[c], word[left] - right = c-1 - break - left += 1 - return "".join(word) - diff --git a/seeker/snippet/airwrk.py b/seeker/snippet/airwrk.py deleted file mode 100644 index 03660173..00000000 --- a/seeker/snippet/airwrk.py +++ /dev/null @@ -1,50 +0,0 @@ -#date: 2024-08-27T16:59:54Z -#url: https://api.github.com/gists/aa376db8b841b1d90d9e0c56f6509272 -#owner: https://api.github.com/users/ahmadalsajid - -# Problem 1 -def div_sum(n): - return sum([int(i) for i in list(str(n))]) - -def solution1(num): - while num > 9: - num = div_sum(num) - print(num) - -solution1(num = 38) - - -# Problem 2 -def solution2(nums): - _set = set(nums) - if len(_set) == len(nums): - print('false') - else: - print('true') - -solution2(nums = [1,1,1,3,3,4,3,2,4,2]) - - -# Problem 3 -def solution3(s): - vowels = 'aeiouAEIOU' - _temp_vowels = list() - _temp_string = list() - - for letter in s: - if letter in vowels: - _temp_vowels.append(letter) - _temp_string.append('_') - else: - _temp_string.append(letter) - - _temp_vowels.reverse() - _index = 0 - for _indx, letter in enumerate(_temp_string): - if letter == '_': - _temp_string[_indx] = _temp_vowels[_index] - _index = _index + 1 - - print(''.join(_temp_string)) - -solution3(s = 'algorithm') diff --git a/seeker/snippet/alpine-container.sh b/seeker/snippet/alpine-container.sh deleted file mode 100644 index b30b2e46..00000000 --- a/seeker/snippet/alpine-container.sh +++ /dev/null @@ -1,82 +0,0 @@ -#date: 2024-08-27T16:52:24Z -#url: https://api.github.com/gists/52bf52fa78a0aec017cd3dcefb9df41f -#owner: https://api.github.com/users/Lokawn - -#!/bin/bash -e -# Creates a systemd-nspawn container with Alpine - -MIRROR=http://dl-cdn.alpinelinux.org/alpine -VERSION=${VERSION:-v3.20} -APKTOOLS_VERSION=2.14.4-r0 - - -wget_or_curl () { - if command -v wget >/dev/null; then - wget -qO- "$1" - elif command -v curl >/dev/null; then - curl -Ls "$1" - else - echo "missing either curl or wget" >&2 - return 1 - fi -} - -if [ $UID -ne 0 ]; then - echo "run this script as root" >&2 - exit 1 -fi - -dest="$1" -if [ -z "$dest" ]; then - echo "Usage: $0 " >&2 - exit 0 -fi -if [ -e "$dest/usr/bin" ]; then - echo "destination already seems to contain a root file system" >&2 - exit 1 -fi - -if [[ "$(uname -m)" =~ ^i[3456]86|x86 ]]; then - toolarch=x86 - guestarch=$toolarch - [ "$(uname -m)" = x86_64 ] && guestarch=x86_64 -elif [[ "$(uname -m)" =~ ^arm|aarch64 ]]; then - toolarch=armv7 - guestarch=$toolarch - [ "$(uname -m)" = aarch64 ] && guestarch=aarch64 -else - echo "unsupported architecture" >&2 - exit 1 -fi -apkdir=$(mktemp -d) -trap 'rm -rf $apkdir' EXIT - -wget_or_curl "$MIRROR/latest-stable/main/$toolarch/apk-tools-static-$APKTOOLS_VERSION.apk" \ - | tar -xz -C $apkdir || \ - { echo "couldn't download apk-tools, the version might have changed..." >&2; exit 1; } - -$apkdir/sbin/apk.static \ - -X $MIRROR/$VERSION/main -U --arch $guestarch \ - --allow-untrusted --root "$dest" \ - --initdb add alpine-base - -mkdir -p "$dest"/{etc/apk,root} -# configure mirror -printf '%s/%s/main\n%s/%s/community\n' "$MIRROR" $VERSION "$MIRROR" $VERSION >"$dest"/etc/apk/repositories -for i in $(seq 0 10); do # https://github.com/systemd/systemd/issues/852 - echo "pts/$i" >>"$dest/etc/securetty" -done -# make console work -sed '/tty[0-9]:/ s/^/#/' -i "$dest"/etc/inittab -printf 'console::respawn:/sbin/getty 38400 console\n' >>"$dest"/etc/inittab -# minimal boot services -for s in hostname bootmisc syslog; do - ln -s /etc/init.d/$s "$dest"/etc/runlevels/boot/$s -done -for s in killprocs savecache; do - ln -s /etc/init.d/$s "$dest"/etc/runlevels/shutdown/$s -done - - -echo "" -echo "Alpine $VERSION $guestarch container was created successfully." diff --git a/seeker/snippet/arch-container.sh b/seeker/snippet/arch-container.sh deleted file mode 100644 index bb324807..00000000 --- a/seeker/snippet/arch-container.sh +++ /dev/null @@ -1,66 +0,0 @@ -#date: 2024-08-27T16:52:24Z -#url: https://api.github.com/gists/52bf52fa78a0aec017cd3dcefb9df41f -#owner: https://api.github.com/users/Lokawn - -#!/bin/bash -e -# Creates a systemd-nspawn container with Arch Linux - -MIRROR=http://mirror.fra10.de.leaseweb.net/archlinux -ISO_DATE=latest -PKG_GROUPS="base" - - -wget_or_curl () { - if command -v wget >/dev/null; then - wget "$1" -O "$2" - elif command -v curl >/dev/null; then - curl -L "$1" -o "$2" - else - echo "missing either curl or wget" >&2 - return 1 - fi -} - -if [ $UID -ne 0 ]; then - echo "run this script as root" >&2 - exit 1 -fi - -dest="$1" -if [ -z "$dest" ]; then - echo "Usage: $0 " >&2 - exit 0 -fi -if [ -e "$dest/usr/bin" ]; then - echo "destination already seems to contain a root file system" >&2 - exit 1 -fi - -[ "$(uname -m)" = x86_64 ] || { echo "unsupported architecture" >&2; exit 1; } -tarfile=$(mktemp) -trap 'rm $tarfile' EXIT - -wget_or_curl "$MIRROR/iso/$ISO_DATE/archlinux-bootstrap-x86_64.tar.gz" $tarfile - -mkdir -p "$dest" -tar -xzf $tarfile -C "$dest" --strip-components=1 --numeric-owner - -# configure mirror -printf 'Server = %s/$repo/os/$arch\n' "$MIRROR" >"$dest"/etc/pacman.d/mirrorlist -sed '/^root: "**********" -rm "$dest/etc/resolv.conf" # systemd configures this -# https://github.com/systemd/systemd/issues/852 -[ -f "$dest/etc/securetty" ] && \ - printf 'pts/%d\n' $(seq 0 10) >>"$dest/etc/securetty" -# seems to be this bug https://github.com/systemd/systemd/issues/3611 -systemd-machine-id-setup --root="$dest" -# install the packages -systemd-nspawn -q -D "$dest" sh -c " -pacman-key --init && pacman-key --populate -pacman -Sy --noconfirm --needed ${PKG_GROUPS} -" - - -echo "" -echo "Arch Linux container was created successfully (bootstrapped from $ISO_DATE)" -ssfully (bootstrapped from $ISO_DATE)" diff --git a/seeker/snippet/asdf.py b/seeker/snippet/asdf.py deleted file mode 100644 index 5b580538..00000000 --- a/seeker/snippet/asdf.py +++ /dev/null @@ -1,5 +0,0 @@ -#date: 2024-08-27T16:53:09Z -#url: https://api.github.com/gists/f8841f027353551fe345fc9b09512701 -#owner: https://api.github.com/users/KeyZox71 - -from math import * diff --git a/seeker/snippet/bill_req_patient.go b/seeker/snippet/bill_req_patient.go deleted file mode 100644 index 8d07403e..00000000 --- a/seeker/snippet/bill_req_patient.go +++ /dev/null @@ -1,87 +0,0 @@ -//date: 2024-08-27T16:59:43Z -//url: https://api.github.com/gists/42d5d3e80b62c284432d3437c3dd2493 -//owner: https://api.github.com/users/ziyuji-pillpack - -package ncpdptypes - -import ( - "time" -) - -// BillReqPatient stores information about the patient that can be provided in a B1 (billing) request. -// See NCPDP D.0, p 67-68. -type BillReqPatient struct { - ID *PatientID `qualifier:"331-CX X(2) optional" field:"332-CY X(20) optional"` - DateOfBirth time.Time `field:"304-C4 9(8)"` - Gender GenderCode `field:"305-C5 9(1)"` - FirstName *string `field:"310-CA X(12) optional"` - LastName string `field:"311-CB X(15)"` - StreetAddress *string `field:"322-CM X(30) optional"` - City *string `field:"323-CN X(20) optional"` - State *StateOrProvinceCode `field:"324-CO X(2) optional"` - Zip *string `field:"325-CP X(15) optional"` - PlaceOfService *PlaceOfServiceCode `field:"307-C7 9(2) optional"` - EmployerID *string `field:"333-CZ X(15) optional"` - PregnancyStatus *PregnancyStatusCode `field:"335-2C X(1) optional"` - PatientResidence *PatientResidenceCode `field:"384-4X 9(2) optional"` - // 326-CQ, 350-HN: not supported because we don't support phone numbers or email addresses - // Note: we don't attempt to validate whether or not EmployerID follows the IRS-specified format. -} - -// PatientID stores an ID for a patient with an associated Kind -type PatientID struct { - Kind PatientIDKind - ID string -} - -// PatientIDKind stores the value of the "Patient ID Qualifier" field (331-CX). -type PatientIDKind eclBase - -// PatientIDKinds stores all valid values of PatientIDKind -var PatientIDKinds = struct { - SSN, - AssignedByLTCFacility, - DriversLicenseNumber, - USMilitaryID, - AssignedByPlan, - AssignedByPlanSSNBased, - MedicaidID, - StateIssuedID, - PassportID, - MedicareBeneficiaryID, - AssignedByEmployer, - AssignedByPayer, - AlienNumber, - StudentVisaNumber, - IndialTribalID, - UPI, - LexID, - Other, - MedicalRecordID PatientIDKind -}{ - SSN: PatientIDKind{"01", "Social Security Number"}, - AssignedByLTCFacility: PatientIDKind{"1J", "Facility ID Number"}, - DriversLicenseNumber: PatientIDKind{"02", "Driver's License Number"}, - USMilitaryID: PatientIDKind{"03", "U.S. Military ID"}, - AssignedByPlan: PatientIDKind{"04", "Non-SSN-based patient identifier assigned by health plan"}, - AssignedByPlanSSNBased: PatientIDKind{"05", "SSN-based patient identifier assigned by health plan"}, - MedicaidID: PatientIDKind{"06", "Medicaid ID"}, - StateIssuedID: PatientIDKind{"07", "State Issued ID"}, - PassportID: PatientIDKind{"08", "Passport ID (or other ID assigned by a national government)"}, - MedicareBeneficiaryID: PatientIDKind{"09", "Medicare Beneficiary ID"}, - AssignedByEmployer: PatientIDKind{"10", "Employer Assigned ID"}, - AssignedByPayer: PatientIDKind{"11", "Payer/PBM Assigned ID"}, - AlienNumber: PatientIDKind{"12", "Alien Number (Government Permanent Residence Number)"}, - StudentVisaNumber: PatientIDKind{"13", "Government Student VISA Number"}, - IndialTribalID: PatientIDKind{"14", "Indian Tribal ID"}, - UPI: PatientIDKind{"15", "NCPDP Universal Patient Identifier (UPI)"}, - LexID: PatientIDKind{"16", "LexID Universal Patient Identifier (UPI)"}, - Other: PatientIDKind{"99", "Other"}, - MedicalRecordID: PatientIDKind{"EA", "Medical Record Identification Number (EHR)"}, -} - -// Values returns a list of all valid values of this type -func (PatientIDKind) Values() values { return valuesFromStruct(PatientIDKinds) } - -// Code returns the serialized value -func (c PatientIDKind) Code() string { return c.code } diff --git a/seeker/snippet/chat.py b/seeker/snippet/chat.py new file mode 100644 index 00000000..c1d5f4b4 --- /dev/null +++ b/seeker/snippet/chat.py @@ -0,0 +1,94 @@ +#date: 2024-08-29T16:50:18Z +#url: https://api.github.com/gists/e1cc87e813835f151f8a342a16764b25 +#owner: https://api.github.com/users/aphexlog + +import json +import boto3 +import logging +import pyaudio +from botocore.exceptions import ClientError +from mypy_boto3_bedrock_runtime.client import BedrockRuntimeClient as BedrockClient +from typing import cast +from pydub import AudioSegment +from io import BytesIO + +# Setup logging +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO) + +class DragonChatHandler: + def __init__(self, region: str): + self.client = cast(BedrockClient, boto3.client("bedrock-runtime", region_name=region)) + self.polly_client = boto3.client('polly', region_name=region) + + "**********" "**********" "**********" "**********" "**********"d "**********"e "**********"f "**********" "**********"g "**********"e "**********"n "**********"e "**********"r "**********"a "**********"t "**********"e "**********"_ "**********"m "**********"e "**********"s "**********"s "**********"a "**********"g "**********"e "**********"( "**********"s "**********"e "**********"l "**********"f "**********", "**********" "**********"m "**********"o "**********"d "**********"e "**********"l "**********"_ "**********"i "**********"d "**********", "**********" "**********"s "**********"y "**********"s "**********"t "**********"e "**********"m "**********"_ "**********"p "**********"r "**********"o "**********"m "**********"p "**********"t "**********", "**********" "**********"m "**********"e "**********"s "**********"s "**********"a "**********"g "**********"e "**********"s "**********", "**********" "**********"m "**********"a "**********"x "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********"s "**********") "**********": "**********" + body = json.dumps({ + "anthropic_version": "bedrock-2023-05-31", + "max_tokens": "**********" + "system": system_prompt, + "messages": messages + }) + + try: + response = self.client.invoke_model(body=body, modelId=model_id) + response_body = json.loads(response.get('body').read()) + return response_body + except ClientError as err: + message = err.response["Error"]["Message"] + logger.error("A client error occurred: %s", message) + raise + + def send_message(self, message, model="anthropic.claude-3-haiku-20240307-v1:0"): + user_message = {"role": "user", "content": message} + messages = [user_message] + system_prompt = "Please respond to the user's message." + max_tokens = "**********" + + return self.generate_message(model, system_prompt, messages, max_tokens) + + def start_conversation(self, initial_message): + return self.send_message(initial_message) + + def continue_conversation(self, message): + return self.send_message(message) + + def speak_response(self, response_string): + response = self.polly_client.synthesize_speech( + Text=response_string, + OutputFormat='mp3', + VoiceId='Joanna' + ) + + if "AudioStream" in response: + # Convert MP3 to PCM using pydub + audio_stream = response['AudioStream'].read() + sound = AudioSegment.from_mp3(BytesIO(audio_stream)) + raw_data = sound.raw_data + sample_width = sound.sample_width + channels = sound.channels + frame_rate = sound.frame_rate + + # Play the audio + p = pyaudio.PyAudio() + stream = p.open(format=p.get_format_from_width(sample_width), + channels=channels, + rate=frame_rate, + output=True) + + stream.write(raw_data) + + stream.stop_stream() + stream.close() + p.terminate() + +if __name__ == "__main__": + chat_handler = DragonChatHandler("us-east-1") + + # Continue the conversation in a loop + while True: + user_input = input("Input message: ") + response = chat_handler.continue_conversation(user_input) + response_string = response["content"][0]["text"] + + print(f"{response_string}\n") + chat_handler.speak_response(response_string) diff --git a/seeker/snippet/code.py b/seeker/snippet/code.py deleted file mode 100644 index 1a404da4..00000000 --- a/seeker/snippet/code.py +++ /dev/null @@ -1,47 +0,0 @@ -#date: 2024-08-27T16:47:35Z -#url: https://api.github.com/gists/abf1e2bfe7149309acc3606cc70619a7 -#owner: https://api.github.com/users/ikeji - -print("Starting") - -from kmk.keys import KC -from kmk.kmk_keyboard import KMKKeyboard as _KMKKeyboard -from kmk.modules.layers import Layers -from kmk.modules.sticky_keys import StickyKeys -from kmk.scanners.keypad import KeysScanner -import board - -_PINS = [ - board.GP2, board.GP6, board.GP10, board.GP13, - board.GP3, board.GP7, board.GP11, board.GP12, -] - -class KMKKeyboard(_KMKKeyboard): - coord_mapping = [ - 0, 1, 2, 3, - 4, 5, 6, 7, - ] - - def __init__(self): - self.matrix = KeysScanner(_PINS) - -keyboard = KMKKeyboard() - -sticky_keys = StickyKeys() - -keyboard.modules.append(Layers()) -keyboard.modules.append(sticky_keys) - -keyboard.keymap = [ - [ - KC.SK(KC.LCTL), KC.LT(1, KC.B), KC.C, KC.D, - KC.E, KC.F, KC.G, KC.H, - ], - [ - KC.I, KC.TRNS, KC.K, KC.L, - KC.M, KC.N, KC.O, KC.P, - ], -] - -if __name__ == '__main__': - keyboard.go() diff --git a/seeker/snippet/error-handling-flask.py b/seeker/snippet/error-handling-flask.py new file mode 100644 index 00000000..de88dde6 --- /dev/null +++ b/seeker/snippet/error-handling-flask.py @@ -0,0 +1,15 @@ +#date: 2024-08-29T16:56:54Z +#url: https://api.github.com/gists/f31e7d620f9525d0e6e7dd81e3d47be9 +#owner: https://api.github.com/users/docsallover + +from flask import Flask, render_template + +app = Flask(__name__) + +@app.errorhandler(404) +def page_not_found(error): + return render_template('404.html'), 404 + +@app.errorhandler(500) +def internal_server_error(error): + return render_template('500.html'), 500 \ No newline at end of file diff --git a/seeker/snippet/flask.py b/seeker/snippet/flask.py new file mode 100644 index 00000000..7c292ac7 --- /dev/null +++ b/seeker/snippet/flask.py @@ -0,0 +1,12 @@ +#date: 2024-08-29T16:52:07Z +#url: https://api.github.com/gists/5290fa55e63c46023f99260af082a329 +#owner: https://api.github.com/users/docsallover + +from flask import render_template +from flask_sqlalchemy import Pagination + +@app.route('/users') +def users(): + page = request.args.get('page', 1, type=int) + pagination = User.query.paginate(page, per_page=10) + return render_template('users.html', pagination=pagination) \ No newline at end of file diff --git a/seeker/snippet/git checkout-all-branches.sh b/seeker/snippet/git checkout-all-branches.sh deleted file mode 100644 index ce024f40..00000000 --- a/seeker/snippet/git checkout-all-branches.sh +++ /dev/null @@ -1,12 +0,0 @@ -#date: 2024-08-27T17:06:09Z -#url: https://api.github.com/gists/d6a4751a624b92d826c13624930f3354 -#owner: https://api.github.com/users/thiagomiranda3 - -#!/bin/bash - -#Whenever you clone a repo, you do not clone all of its branches by default. -#If you wish to do so, use the following script: - -for branch in `git branch -a | grep remotes | grep -v HEAD | grep -v master `; do - git branch --track ${branch#remotes/origin/} $branch -done \ No newline at end of file diff --git a/seeker/snippet/install.sh b/seeker/snippet/install.sh deleted file mode 100644 index 08db4ec2..00000000 --- a/seeker/snippet/install.sh +++ /dev/null @@ -1,19 +0,0 @@ -#date: 2024-08-27T17:06:11Z -#url: https://api.github.com/gists/231ab631fa3f46152a2db92910d38d4b -#owner: https://api.github.com/users/thatrandomperson5 - -echo "Installing VSCode CLI in $PREFIX/bin" - -ARCHITECTURE=$(lscpu | grep "Architecture" | tr -d " \t\n\r" | cut -c 14-) # Extract the architecture -URL="" - -if ["$ARCHITECTURE" = "arm64"] || ["$ARCHITECTURE" = "aarch64"]; then - URL="https://code.visualstudio.com/sha/download?build=stable&os=cli-alpine-arm64" -elif ["$ARCHITECTURE" = "arm32"]; then - URL="https://code.visualstudio.com/sha/download?build=stable&os=cli-linux-armhf" -elif ["$ARCHITECTURE" = "x64"] || ["$ARCHITECTURE" = "x86_64"] || ["$ARCHITECTURE" = "amd64"]; then - URL="https://code.visualstudio.com/sha/download?build=stable&os=cli-alpine-x64" -fi - -curl -sSfL "$URL" -o "$PREFIX/bin/vscode.tar.gz" - diff --git a/seeker/snippet/neovim.sh b/seeker/snippet/neovim.sh deleted file mode 100644 index c45c1f54..00000000 --- a/seeker/snippet/neovim.sh +++ /dev/null @@ -1,17 +0,0 @@ -#date: 2024-08-27T16:54:10Z -#url: https://api.github.com/gists/8fcc46f5c40c11636b086b1420c71802 -#owner: https://api.github.com/users/JanGalek - -#!/usr/bin/env sh - -sudo apt-get install ripgrep - -# installs nvm (Node Version Manager) -curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.0/install.sh | bash - -wget https://github.com/neovim/neovim/releases/download/v0.10.1/nvim-linux64.tar.gz -tar tar xzvf nvim-linux64.tar.gz -cd nvim-linux64 -sudo cp -R * /usr - -git clone https://github.com/NvChad/starter ~/.config/nvim && nvim \ No newline at end of file diff --git a/seeker/snippet/pipes_1.py b/seeker/snippet/pipes_1.py new file mode 100644 index 00000000..303349e4 --- /dev/null +++ b/seeker/snippet/pipes_1.py @@ -0,0 +1,50 @@ +#date: 2024-08-29T17:03:30Z +#url: https://api.github.com/gists/f52bd8bf0c1090729519f5f426bf2264 +#owner: https://api.github.com/users/officialdun + +from collections import deque + +def main(filename): + grid = {} + source = None + + with open(filename) as file: + for line in file: + char, x, y = line.strip().split() + x, y = int(x), int(y) + if char == '*': + source = (x, y) + grid[(x, y)] = char + + # BFS setup + queue = deque([source]) + visited = set([source]) + connected = set() + + # Directions for adjacent cells + directions = [(0, 1, '║╝╚╩╣╠╬*', '║╗╔╦╣╠╬', 'up'), # Going up + (1, 0, '═╔╚╦╩╠╬*', '═╗╝╣╩╦╬', 'right'), # Going right + (0, -1, '║╗╔╦╣╠╬*', '║╝╚╩╣╠╬', 'down'), # Going down + (-1, 0, '═╗╝╣╩╦╬*', '═╔╚╦╩╠╬','left')] # Going left + + # BFS traversal + while queue: + x, y = queue.popleft() + for dx, dy, cFrom, cTo, direction in directions: + nx, ny = x + dx, y + dy + if (nx, ny) in visited: + continue + if (nx, ny) in grid: + fromCell = grid[(x,y)] + cell = grid[(nx, ny)] + doesConnect = fromCell in cFrom and (cell.isalpha() or cell in cTo) + # print(f'{fromCell} → {cell} going {direction}. connected: {doesConnect}') + if cell == '*' or doesConnect: + if cell.isalpha(): + connected.add(cell) + queue.append((nx, ny)) + visited.add((nx, ny)) + + return ''.join(sorted(connected)) + +print(main('coding_qual_input.txt')) diff --git a/seeker/snippet/pomo.py b/seeker/snippet/pomo.py deleted file mode 100644 index 04d51991..00000000 --- a/seeker/snippet/pomo.py +++ /dev/null @@ -1,85 +0,0 @@ -#date: 2024-08-27T16:59:34Z -#url: https://api.github.com/gists/fbf95eab2ac2c6fa692603254e28aa45 -#owner: https://api.github.com/users/YigitChanson - -import tkinter as tk -from tkinter import messagebox -from tkinter import ttk # tkinter'in standart ttk modülü -import time - -WORK_TIME = 25 * 60 -SHORT_BREAK_TIME = 5 * 60 -LONG_BREAK_TIME = 15 * 60 - -class PomodoroTimer: - def __init__(self): - self.root = tk.Tk() - self.root.geometry("300x300") - self.root.title("Pomodoro Timer") - - self.timer_label = tk.Label(self.root, text="00:00", font=("TkDefaultFont", 40)) - self.timer_label.pack(pady=20) - - self.start_button = ttk.Button(self.root, text="Start", command=self.start_timer) - self.start_button.pack(pady=5) - - self.stop_button = ttk.Button(self.root, text="Stop", command=self.stop_timer, state=tk.DISABLED) - self.stop_button.pack(pady=5) - - self.reset_button = ttk.Button(self.root, text="Reset", command=self.reset_timer, state=tk.DISABLED) - self.reset_button.pack(pady=5) - - self.work_time, self.break_time = WORK_TIME, SHORT_BREAK_TIME - self.is_work_time, self.pomodoros_completed, self.is_running = True, 0, False - - self.root.mainloop() - - def start_timer(self): - self.start_button.config(state=tk.DISABLED) - self.stop_button.config(state=tk.NORMAL) - self.reset_button.config(state=tk.NORMAL) - self.is_running = True - self.update_timer() - - def stop_timer(self): - self.start_button.config(state=tk.NORMAL) - self.stop_button.config(state=tk.DISABLED) - self.reset_button.config(state=tk.NORMAL) - self.is_running = False - - def reset_timer(self): - self.is_running = False - self.start_button.config(state=tk.NORMAL) - self.stop_button.config(state=tk.DISABLED) - self.reset_button.config(state=tk.DISABLED) - self.work_time, self.break_time = WORK_TIME, SHORT_BREAK_TIME - self.is_work_time = True - self.timer_label.config(text="00:00") - - def update_timer(self): - if self.is_running: - if self.is_work_time: - self.work_time -= 1 - minutes, seconds = divmod(self.work_time, 60) - self.timer_label.config(text=f"{minutes:02}:{seconds:02}") - if self.work_time == 0: - self.is_work_time = False - self.pomodoros_completed += 1 - self.break_time = LONG_BREAK_TIME if self.pomodoros_completed % 4 == 0 else SHORT_BREAK_TIME - messagebox.showinfo( - "Good job!", - "Take a long break to rest your mind." if self.pomodoros_completed % 4 == 0 else "Take a short break and stretch your legs" - ) - else: - self.break_time -= 1 - minutes, seconds = divmod(self.break_time, 60) - self.timer_label.config(text=f"{minutes:02}:{seconds:02}") - if self.break_time == 0: - self.is_work_time = True - self.work_time = WORK_TIME - messagebox.showinfo("Break over!", "Time to get back to work!") - - self.root.after(1000, self.update_timer) - - -PomodoroTimer() \ No newline at end of file diff --git a/seeker/snippet/repack.sh b/seeker/snippet/repack.sh new file mode 100644 index 00000000..058a12da --- /dev/null +++ b/seeker/snippet/repack.sh @@ -0,0 +1,56 @@ +#date: 2024-08-29T17:01:50Z +#url: https://api.github.com/gists/a0b52ac833a4692bb0e9ab214f09afbe +#owner: https://api.github.com/users/kenvandine + +#!/bin/bash + +dir=$(dirname $(realpath $0)) +in=$1 + +if [ $UID != 0 ]; +then + echo "Must be run with root privileges, for example with sudo" + exit +fi + +if [ $# -lt 1 ]; +then + echo "USAGE: sudo $0 SOURCE_ISO" + exit +fi + +if [ -d $dir/out ]; +then + rm $dir/out/* 2>/dev/null +else + mkdir $dir/out +fi + +if [ ! -d $dir/debs ]; +then + mkdir $dir/debs +fi + +date=$(date "+%Y%m%d-%H%M") + +# Output file should be NAME-UBUNTUVERSION-DATE-HOUR:MINUTE-ARCH.iso +out=$(echo "${in//ubuntu/NAME}") +out=$(echo "${out//base/$date}") + +echo "Fetching local debian packages" +wget -O $dir/debs/google-chrome-stable_current_amd64.deb https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb + +cd $dir +echo $out > iso-version + +echo "Creating $out" +echo "Adding local debs to pool" +livefs-editor $in out/repack.iso --add-debs-to-pool debs/*.deb +echo "Copying in autoinstall.yaml" +livefs-editor out/repack.iso out/repack2.iso --cp $PWD/autoinstall.yaml new/iso/autoinstall.yaml +rm -f out/repack.iso +livefs-editor out/repack2.iso out/repack3.iso --cp $PWD/iso-version new/iso/iso-version +rm -f out/repack2.iso +mv out/repack3.iso $out + +echo "$out created" diff --git a/seeker/snippet/repackepub.sh b/seeker/snippet/repackepub.sh new file mode 100644 index 00000000..241c1a26 --- /dev/null +++ b/seeker/snippet/repackepub.sh @@ -0,0 +1,5 @@ +#date: 2024-08-29T16:55:08Z +#url: https://api.github.com/gists/11e26db32d5c7a870476954a1cd40666 +#owner: https://api.github.com/users/pa-0 + +zip -rX "../$(basename "$(realpath .)").epub" mimetype $(ls|xargs echo|sed 's/mimetype//g') \ No newline at end of file diff --git a/seeker/snippet/runner.go b/seeker/snippet/runner.go new file mode 100644 index 00000000..8551000b --- /dev/null +++ b/seeker/snippet/runner.go @@ -0,0 +1,112 @@ +//date: 2024-08-29T16:54:35Z +//url: https://api.github.com/gists/c69cdc91a4dbf731bee5aba1696e00da +//owner: https://api.github.com/users/semenovdev + +package main + +import ( + "context" + "fmt" + "io" + "log" + "log/slog" + "time" + + "awesomeProject/runner" +) + +type Service1 struct { + io.Closer +} + +func (s *Service1) Run(ctx context.Context) error { + fmt.Println("start executing of Service1") + time.Sleep(time.Minute * 10) + fmt.Printf("Service1 is running (%s)\n", ctx.Value("Привет")) + return nil +} + +func (s *Service1) Close() error { + fmt.Println("close service1") + return nil +} + +type Service2 struct { + io.Closer + + Cancel func() +} + +func (s *Service2) Run(ctx context.Context) error { + time.Sleep(time.Second * 2) + fmt.Printf("Service2 is running (%s)\n", ctx.Value("Привет")) + time.Sleep(time.Minute) + s.Cancel() + return nil +} + +func (s *Service2) Close() error { + fmt.Println("close service2") + return nil +} + +type Service3 struct { + io.Closer +} + +func (s *Service3) Run(ctx context.Context) error { + time.Sleep(time.Second * 3) + fmt.Printf("Service3 is running (%s)\n", ctx.Value("Привет")) + select {} + return nil +} + +func (s *Service3) Close() error { + fmt.Println("close service3") + return nil +} + +type Service4 struct { + io.Closer +} + +func (s *Service4) Run(ctx context.Context) error { + fmt.Printf("Service4 with error (%s)\n", ctx.Value("Привет")) + return fmt.Errorf("error in service 4") +} + +func (s *Service4) Close() error { + fmt.Println("close service4") + return nil +} + +func main() { + ctx := context.Background() + ctx = context.WithValue(ctx, "Привет", "Мир") + ctx, cancel := context.WithCancel(ctx) + + service1 := &Service1{} // завершится по таймауту и не затронет систему + service2 := &Service2{ // упадёт через минуту после старта и утянет за собой всё + Cancel: cancel, + } + service3 := &Service3{} // работал бы бесконечно, если бы не Service2 + service4 := &Service4{} // завершится с ошибкой в кроне и запишет её в лог + + app := runner.New( + runner.WithContext(ctx), + runner.WithCronJobTimeout(time.Second), + runner.WithErrorLogger(slog.Default()), + ) + err := app.AddCronJob("* * * * *", service1) + if err != nil { + log.Fatal(err) + } + err = app.AddCronJob("* * * * *", service4) + if err != nil { + log.Fatal(err) + } + + app.RegisterService(service2) + app.RegisterService(service3) + app.Run() +} diff --git a/seeker/snippet/sharepoint_connection.py b/seeker/snippet/sharepoint_connection.py deleted file mode 100644 index 4fda16b8..00000000 --- a/seeker/snippet/sharepoint_connection.py +++ /dev/null @@ -1,93 +0,0 @@ -#date: 2024-08-27T16:59:37Z -#url: https://api.github.com/gists/f22337768cf3a0f9bb74fca1f8c096b0 -#owner: https://api.github.com/users/luisdelatorre012 - -""" -dynaconf settings - -# settings.toml -[default] -site_name = "Your SharePoint Site Name" - -# .secrets.toml -[default] -client_id = "your_client_id_here" -client_secret = "**********" -tenant_id = "your_tenant_id_here" -""" - -import httpx -from msal import ConfidentialClientApplication -from typing import Optional -from dynaconf import Dynaconf - -# Initialize Dynaconf -settings = Dynaconf( - settings_files= "**********" - environments=True, - load_dotenv=True, -) - -def get_access_token(client_id: "**********": str, tenant_id: str) -> Optional[str]: - authority = f"https://login.microsoftonline.com/{tenant_id}" - app = ConfidentialClientApplication( - client_id, - authority=authority, - client_credential= "**********" - ) - - scopes = ["https://graph.microsoft.com/.default"] - result = "**********"=None) - - if not result: - result = "**********"=scopes) - - "**********" "**********" "**********" "**********" "**********"i "**********"f "**********" "**********"" "**********"a "**********"c "**********"c "**********"e "**********"s "**********"s "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********"" "**********" "**********"i "**********"n "**********" "**********"r "**********"e "**********"s "**********"u "**********"l "**********"t "**********": "**********" - return result["access_token"] - else: - print(result.get("error")) - print(result.get("error_description")) - print(result.get("correlation_id")) - return None - -def get_sharepoint_site_id(access_token: "**********": str) -> Optional[str]: - headers = { - "Authorization": "**********" - "Content-Type": "application/json" - } - - url = f"https://graph.microsoft.com/v1.0/sites?search={site_name}" - - with httpx.Client() as client: - response = client.get(url, headers=headers) - - if response.status_code == 200: - sites = response.json().get("value", []) - if sites: - return sites[0]["id"] - - print(f"Error: {response.status_code}") - print(response.text) - return None - -def main() -> None: - # Read configuration from Dynaconf - client_id = settings.client_id - client_secret = "**********" - tenant_id = settings.tenant_id - site_name = settings.site_name - - access_token = "**********" - - "**********" "**********" "**********" "**********" "**********"i "**********"f "**********" "**********"a "**********"c "**********"c "**********"e "**********"s "**********"s "**********"_ "**********"t "**********"o "**********"k "**********"e "**********"n "**********": "**********" - site_id = "**********" - if site_id: - print(f"Successfully connected to SharePoint site. Site ID: {site_id}") - else: - print("Failed to retrieve SharePoint site ID.") - else: - print("Failed to acquire access token.") - -if __name__ == "__main__": - main() - diff --git a/seeker/snippet/slotVerse1withgui.py b/seeker/snippet/slotVerse1withgui.py deleted file mode 100644 index f4b5aeeb..00000000 --- a/seeker/snippet/slotVerse1withgui.py +++ /dev/null @@ -1,117 +0,0 @@ -#date: 2024-08-27T17:07:07Z -#url: https://api.github.com/gists/93e7e976f915d42bdfc710f9d38ae691 -#owner: https://api.github.com/users/YigitChanson - -import tkinter as tk -from tkinter import messagebox -import random - -class CasinoAccount: - def __init__(self): - self.balance = 0 - - def deposit(self, amount): - self.balance += amount - - def get_balance(self): - return self.balance - -account = CasinoAccount() - -ROWS = 3 -COLS = 3 - -symbol_count = { - "A": 2, - "B": 4, - "C": 6, - "D": 8 -} - -def get_slot_machine_spin(rows, cols, symbols): - all_symbols = [] - for symbol, symbol_count in symbols.items(): - for i in range(symbol_count): - all_symbols.append(symbol) - - slots = [] - for col in range(cols): - slot = [] - current_symbols = all_symbols[:] - for _ in range(rows): - value = random.choice(current_symbols) - current_symbols.remove(value) - slot.append(value) - - slots.append(slot) - - return slots - -def deposit(): - while True: - amount = input("What would you like to deposit? (Maximum $300): ") - if amount.isdigit(): - amount = int(amount) - if 0 < amount <= 300: - account.deposit(amount) - print(f"Deposited ${amount}. Your current balance is ${account.get_balance()}.") - break - else: - print("Invalid amount.") - else: - print("Invalid input. Please enter a number.") - return account.get_balance() - -def update_balance_label(): - balance_label.config(text=f"Balance: ${account.get_balance()}") - -def place_bet(): - lines = int(lines_var.get()) - bet = int(bet_entry.get()) - total_bet = bet * lines - - if total_bet > account.get_balance(): - messagebox.showerror("Error", "You do not have enough balance to place this bet.") - return - - account.balance -= total_bet - update_balance_label() - - columns = get_slot_machine_spin(ROWS, COLS, symbol_count) - for i in range(ROWS): - for j in range(COLS): - slot_labels[i][j].config(text=columns[j][i]) - - # Slot sonuçlarını kontrol et (Bu kısım geliştirilebilir) - messagebox.showinfo("Result", "Good luck next time!") - -# Tkinter GUI -root = tk.Tk() -root.title("Casino Slot Machine") - -balance_label = tk.Label(root, text=f"Balance: ${account.get_balance()}") -balance_label.pack() - -deposit_button = tk.Button(root, text="Deposit $100", command=lambda: [account.deposit(100), update_balance_label()]) -deposit_button.pack() - -lines_var = tk.StringVar(value="1") -lines_label = tk.Label(root, text="Number of lines to bet on (1-3):") -lines_label.pack() -lines_entry = tk.Entry(root, textvariable=lines_var) -lines_entry.pack() - -bet_label = tk.Label(root, text="Bet amount per line:") -bet_label.pack() -bet_entry = tk.Entry(root) -bet_entry.pack() - -spin_button = tk.Button(root, text="Spin", command=place_bet) -spin_button.pack() - -slot_labels = [[tk.Label(root, text="") for _ in range(COLS)] for _ in range(ROWS)] -for row in slot_labels: - for label in row: - label.pack() - -root.mainloop() diff --git a/seeker/snippet/slotVerse2nogui.py b/seeker/snippet/slotVerse2nogui.py deleted file mode 100644 index cb547b00..00000000 --- a/seeker/snippet/slotVerse2nogui.py +++ /dev/null @@ -1,107 +0,0 @@ -#date: 2024-08-27T17:08:41Z -#url: https://api.github.com/gists/5e1491aee45e618ca0422cd92fecda53 -#owner: https://api.github.com/users/YigitChanson - -import random - -class CasinoAccount: - def __init__(self): - self.balance = 0 - - def deposit(self, amount): - self.balance += amount - - def get_balance(self): - return self.balance - -# Sınıfın dışına çıkarıldı -account = CasinoAccount() - -ROWS = 3 -COLS = 3 - -symbol_count = { - "A": 2, - "B": 4, - "C": 6, - "D": 8 -} - -def get_slot_machine_spin(rows, cols, symbols): - all_symbols = [] - for symbol, symbol_count in symbols.items(): - for i in range(symbol_count): - all_symbols.append(symbol) - - slots = [] - for col in range(cols): - slot = [] - current_symbols = all_symbols[:] - for _ in range(rows): - value = random.choice(current_symbols) - current_symbols.remove(value) - slot.append(value) - - slots.append(slot) - - return slots - -def print_slot_machine(slots): - for row in range(len(slots[0])): - for i, slot in enumerate(slots): - if i != len(slots) - 1: - print(slot[row], "|", end=" ") - else: - print(slot[row]) - -def deposit(): - while True: - amount = input("What would you like to deposit? (Maximum $300): ") - if amount.isdigit(): - amount = int(amount) - if 0 < amount <= 300: - account.deposit(amount) - print(f"Deposited ${amount}. Your current balance is ${account.get_balance()}.") - break - else: - print("Invalid amount.") - else: - print("Invalid input. Please enter a number.") - return account.get_balance() - -def get_number_of_lines(): - while True: - lines = input("How many lines would you like to bet on? (1-3): ") - if lines.isdigit() and 1 <= int(lines) <= 3: - lines = int(lines) - return lines - else: - print("Invalid input. Please enter a number between 1 and 3.") - -def get_bet(): - while True: - amount = input("What would you like to bet on each line? ($): ") - if amount.isdigit(): - amount = int(amount) - if amount >= 1: - return amount - else: - print("The minimum bet is $1.") - else: - print("Invalid input. Please enter a number.") - -def main(): - balance = deposit() - lines = get_number_of_lines() - bet = get_bet() - total_bet = bet * lines - if total_bet > balance: - print(f"You do not have enough balance to place this bet. Your current balance is ${balance}.") - else: - print(f"You are betting ${bet} on {lines} lines. Total bet is ${total_bet}.") - print(f"Your remaining balance after this bet is ${balance - total_bet}.") - - columns = get_slot_machine_spin(ROWS, COLS, symbol_count) - print_slot_machine(columns) - -main() \ No newline at end of file diff --git a/seeker/snippet/spbpu_schedule_to_google_calendar.py b/seeker/snippet/spbpu_schedule_to_google_calendar.py new file mode 100644 index 00000000..b5013d3a --- /dev/null +++ b/seeker/snippet/spbpu_schedule_to_google_calendar.py @@ -0,0 +1,262 @@ +#date: 2024-08-29T17:04:19Z +#url: https://api.github.com/gists/335367f343d28ca3f6612f965b33bbd3 +#owner: https://api.github.com/users/iwsylit + +""" +Simple script for copying SPbPU lessons to Google Calendar. + +Before usage: +- find the id of your group +- get Google Calendar API credentials +- fill env variables (GROUP_ID, GOOGLE_CALENDAR_ID, GOOGLE_SECRETS_FILE, GOOGLE_CREDENTIALS_FILE) +- pip install requests==2.32.3 google-api-python-client==2.142.0 google-auth-oauthlib==2.0.0 +""" + +import logging +import os +import pickle +from abc import ABC, abstractmethod +from datetime import datetime, timedelta +from operator import itemgetter +from typing import Any, Self +from zoneinfo import ZoneInfo + +import requests +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build + +logging.basicConfig(format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S") + + +def getenv(key: str) -> str: + var = os.getenv(key) + + if var is None: + raise ValueError(f"There is no env variable {key}") + + return var + + +class DateTime(datetime): + def __new__(cls, *args: Any, timezone: ZoneInfo = ZoneInfo("Etc/GMT-3"), **kwargs: Any) -> Self: + instance = super().__new__(cls, *args, **kwargs) + + return instance.replace(tzinfo=timezone) + + @classmethod + def from_datetime(cls, datetime: datetime) -> Self: + return cls( + datetime.year, + datetime.month, + datetime.day, + datetime.hour, + datetime.minute, + datetime.second, + datetime.microsecond, + ) + + @classmethod + def from_iso(cls, date: str) -> Self: + return cls.from_datetime(datetime.fromisoformat(date)) + + @classmethod + def from_date_time(cls, date: str, time: str) -> Self: + return cls.from_datetime(datetime.strptime(f"{date} {time}", "%Y-%m-%d %H:%M")) + + @classmethod + def today(cls) -> Self: + return cls.now().replace(hour=0, minute=0, second=0, microsecond=0) + + def monday(self) -> Self: + days_to_monday = timedelta(days=self.weekday()) + monday = self - days_to_monday + + return monday.replace(hour=0, minute=0, second=0, microsecond=0) + + def isodateformat(self) -> str: + return self.strftime("%Y-%m-%d") + + +class Event(ABC): + def __init__(self, id: str, name: str, location: str, description: str, start: DateTime, end: DateTime) -> None: + self.id = id + self.name = name + self.location = location + self.description = description + self.start = start + self.end = end + + @abstractmethod + def construct(cls, event: dict) -> Self: + pass + + def googleformat(self) -> dict: + return { + "summary": self.name, + "location": self.location, + "description": self.description, + "start": { + "dateTime": self.start.isoformat(), + }, + "end": { + "dateTime": self.end.isoformat(), + }, + } + + def __eq__(self, value: object) -> bool: + return self.__hash__() == value.__hash__() + + def __hash__(self) -> int: + return hash((self.name, self.location, self.description, self.start, self.end)) + + def __repr__(self) -> str: + return f"{self.name}; {self.start.time()}-{self.end.time()}; {self.description}" + + +class GoogleEvent(Event): + @classmethod + def construct(cls, event: dict) -> Self: + return cls( + id=event["id"], + name=event["summary"], + location=event["location"], + description=event["description"], + start=DateTime.from_iso(event["start"]["dateTime"]), + end=DateTime.from_iso(event["end"]["dateTime"]), + ) + + +class PolyEvent(Event): + @classmethod + def construct(cls, event: dict) -> Self: + auditory = event["auditories"][0] + + teacher = ", ".join(map(itemgetter("full_name"), event["teachers"])) + lms = "LMS: " + event["lms_url"] if event["lms_url"] else "" + webinar = "Webinar: " + event["webinar_url"] if event["webinar_url"] else "" + + return cls( + id="", + name=event["subject"], + location=f"{auditory['building']['name']}, ауд. {auditory['name']}", + description="\n".join([teacher, lms, webinar]).strip(), + start=DateTime.from_date_time(event["date"], event["time_start"]), + end=DateTime.from_date_time(event["date"], event["time_end"]), + ) + + +class Calendar(ABC): + def __init__(self) -> None: + super().__init__() + logging.info(f"Connecting to {self.__class__.__name__}") + + @abstractmethod + def list_week_events(self, start: DateTime) -> set[Event]: + pass + + +class GoogleCalendar(Calendar): + _scopes = ["https://www.googleapis.com/auth/calendar"] + _secrets_file = "**********" + _credentials_file = getenv("GOOGLE_CREDENTIALS_FILE") + _calendar_id = getenv("GOOGLE_CALENDAR_ID") + + def __init__(self) -> None: + super().__init__() + + if not os.path.exists(self._credentials_file): + flow = "**********" + creds = flow.run_local_server(port=0) + + with open(self._credentials_file, "wb") as f: + pickle.dump(creds, f) + else: + with open(self._credentials_file, "rb") as f: + creds = pickle.load(f) + + self.api = build("calendar", "v3", credentials=creds) + + def list_week_events(self, start: DateTime) -> set[Event]: + end = start + timedelta(days=6) + + events = ( + self.api.events() + .list( + calendarId=self._calendar_id, + timeMin=start.isoformat(), + timeMax=end.isoformat(), + ) + .execute() + )["items"] + + return set(map(GoogleEvent.construct, events)) + + def create(self, event: Event) -> None: + logging.info(f"Create event {event}") + self.api.events().insert(calendarId=self._calendar_id, body=event.googleformat()).execute() + + def remove(self, event: Event) -> None: + logging.info(f"Remove event {event}") + self.api.events().delete(calendarId=self._calendar_id, eventId=event.id).execute() + + +class PolyCalendar(Calendar): + _group_id = getenv("GROUP_ID") + + def list_week_events(self, start: DateTime) -> set[Event]: + response = requests.get(self._url(start)) + response.raise_for_status() + schedule = response.json() + + events = [] + + for day in schedule["days"]: + for event in day["lessons"]: + event["date"] = day["date"] + + events.append(PolyEvent.construct(event)) + + return set(events) + + def _url(self, start: DateTime) -> str: + return f"https://ruz.spbstu.ru/api/v1/ruz/scheduler/{self._group_id}?date={start.isodateformat()}" + + +if __name__ == "__main__": + logging.info("Begin working") + + poly_calendar = PolyCalendar() + google_calendar = GoogleCalendar() + + for week in range(4): + start = DateTime.today().monday() + timedelta(days=7 * week) + + logging.info(f"Parse {start.isodateformat()} week") + + poly_events = poly_calendar.list_week_events(start) + google_events = google_calendar.list_week_events(start) + + new_events = poly_events.difference(google_events) + expired_events = google_events.difference(poly_events) + + logging.debug(f"Poly events: {list(poly_events)}") + logging.debug(f"Google events: {list(google_events)}") + logging.debug(f"New events: {list(new_events)}") + logging.debug(f"Expired events: {list(expired_events)}") + + if not new_events and not expired_events: + logging.info("There is no updates") + elif not new_events: + logging.info("There is no new events") + elif not expired_events: + logging.info("There is no expired events") + + for event in expired_events: + google_calendar.remove(event) + + for event in new_events: + google_calendar.create(event) +nt) + + for event in new_events: + google_calendar.create(event) diff --git a/seeker/snippet/ssacli_to_json.py b/seeker/snippet/ssacli_to_json.py deleted file mode 100644 index fda59b46..00000000 --- a/seeker/snippet/ssacli_to_json.py +++ /dev/null @@ -1,52 +0,0 @@ -#date: 2024-08-27T16:54:55Z -#url: https://api.github.com/gists/bae3ba42d696e3d3d597237a680f4dff -#owner: https://api.github.com/users/IMpcuong - -#!/usr/bin/env python3 -import sys -import json - -def parse_value(value): - try: - # Try converting to integer - return int(value) - except ValueError: - try: - # Try converting to float - return float(value.strip(' C')) # Also strip ' C' in case of temperatures - except ValueError: - # Return original string if no conversion is possible - return value - -def parse_line(line): - if ':' in line: - key, value = line.split(':', 1) - return key.strip(), parse_value(value.strip()) - return None, None - -def parse_ssacli_output(ssacli_output): - data = {'Controller': {}, 'Ports': [], 'Drives': []} - current_entity = data['Controller'] - - for line in ssacli_output: - line = line.strip() - if "Port Name" in line: - current_entity = {} - data['Ports'].append(current_entity) - elif "physicaldrive" in line: - current_entity = {'ID': line.split()[-1]} - data['Drives'].append(current_entity) - else: - key, value = parse_line(line) - if key: - current_entity[key] = value - - return data - -def main(): - ssacli_output = sys.stdin.readlines() - parsed_data = parse_ssacli_output(ssacli_output) - print(json.dumps(parsed_data, indent=4, ensure_ascii=False)) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/seeker/snippet/thirty_seven.java b/seeker/snippet/thirty_seven.java deleted file mode 100644 index 4fbb16b0..00000000 --- a/seeker/snippet/thirty_seven.java +++ /dev/null @@ -1,20 +0,0 @@ -//date: 2024-08-27T16:51:43Z -//url: https://api.github.com/gists/91aae8961d6274e074562132fc06edc0 -//owner: https://api.github.com/users/sasub-mlp - -import java.util.Scanner; - -public class thirty_seven { - public static void main(String[] args){ - Scanner scanner=new Scanner(System.in); - int fact=1; - System.out.println("Enter the number to find the factorial of it: "); - int n= scanner.nextInt(); - int m=n; - while(n!=0){ - fact*=n; - n--; - } - System.out.println("Factorial of "+m+" is : "+fact); - } -} diff --git a/seeker/snippet/ubuntu-container.sh b/seeker/snippet/ubuntu-container.sh deleted file mode 100644 index b15335ca..00000000 --- a/seeker/snippet/ubuntu-container.sh +++ /dev/null @@ -1,72 +0,0 @@ -#date: 2024-08-27T16:52:24Z -#url: https://api.github.com/gists/52bf52fa78a0aec017cd3dcefb9df41f -#owner: https://api.github.com/users/Lokawn - -#!/bin/bash -e -# Creates a systemd-nspawn container with Ubuntu - -CODENAME=${CODENAME:-noble} - -wget_or_curl () { - if command -v wget >/dev/null; then - wget "$1" -O "$2" - elif command -v curl >/dev/null; then - curl -L "$1" -o "$2" - else - echo "missing either curl or wget" >&2 - return 1 - fi -} - -if [ $UID -ne 0 ]; then - echo "run this script as root" >&2 - exit 1 -fi - -dest="$1" -if [ -z "$dest" ]; then - echo "Usage: $0 " >&2 - exit 0 -fi -if [ -e "$dest/usr/bin" ]; then - echo "destination already seems to contain a root file system" >&2 - exit 1 -fi - -if [ "$(uname -m)" = x86_64 ]; then - guestarch=amd64 -elif [ "$(uname -m)" = aarch64 ]; then - guestarch=arm64 -else - echo "unsupported architecture" >&2 - exit 1 -fi -rootfs=$(mktemp) -trap 'rm $rootfs' EXIT - -wget_or_curl "http://cloud-images.ubuntu.com/${CODENAME}/current/${CODENAME}-server-cloudimg-${guestarch}-root.tar.xz" $rootfs - -mkdir -p "$dest" -tar -xaf $rootfs -C "$dest" --numeric-owner - -sed '/^root: "**********" -rm "$dest/etc/resolv.conf" # systemd configures this -# https://github.com/systemd/systemd/issues/852 -[ -f "$dest/etc/securetty" ] && \ - printf 'pts/%d\n' $(seq 0 10) >>"$dest/etc/securetty" -# container needs no mounts ->"$dest/etc/fstab" -# disable services and uninstall packages -systemd-nspawn -q -D "$dest" sh -c ' -[ -s /etc/environment ] && . /etc/environment -for unit in ssh.service ssh.socket systemd-timesyncd systemd-networkd-wait-online systemd-resolved; do - systemctl is-enabled "$unit" && systemctl disable "$unit" -done -apt-get -qq satisfy -y --purge "Conflicts: lxcfs, lxd, snapd, cloud-init" || \ -apt-get -qq purge --autoremove snapd lxcfs lxd cloud-init -' - - -echo "" -echo "Ubuntu $CODENAME $guestarch container was created successfully" -ch container was created successfully" diff --git a/seeker/snippet/vendor_sleuth.sh b/seeker/snippet/vendor_sleuth.sh new file mode 100644 index 00000000..0e4b8bc5 --- /dev/null +++ b/seeker/snippet/vendor_sleuth.sh @@ -0,0 +1,31 @@ +#date: 2024-08-29T17:02:18Z +#url: https://api.github.com/gists/0cd89a25c730ac267559c44b5487c9ff +#owner: https://api.github.com/users/spezifisch + +export DEVICE_BASE=$HOME/android/lineage/device/xiaomi/veux +export VENDOR_BASE=$HOME/android/lineage/vendor/xiaomi/veux/proprietary +export STOCK_BASE=$HOME/Dumps/veux-stock-vendor-20240829-1 + +for x in $(cat "$DEVICE_BASE/proprietary-files.txt" | cut -d'|' -f1 | grep -Ev '(^#|^$)'); do + F="$VENDOR_BASE/$x" + + if [ -e "$F" ]; then + #echo "found $x in vendor" + + G="$STOCK_BASE/$x" + if [ -e "$G" ]; then + #echo "found $x in stock" + + if diff -q "$F" "$G" > /dev/null; then + # same files + echo "match-vendor/stock $x" + else + echo "mismatch-vendor/stock $x" + fi + else + echo "missing-compare $x" + fi + else + echo "extraneous $x" + fi +done \ No newline at end of file diff --git a/seeker/snippet/vlm_rag.py b/seeker/snippet/vlm_rag.py new file mode 100644 index 00000000..6596252c --- /dev/null +++ b/seeker/snippet/vlm_rag.py @@ -0,0 +1,319 @@ +#date: 2024-08-29T16:53:11Z +#url: https://api.github.com/gists/f4006d00cc1fcfa237d7f191c940011d +#owner: https://api.github.com/users/sovrasov + +import time +import torch +from torch.utils.data import DataLoader +from tqdm import tqdm +from transformers import AutoProcessor +from PIL import Image +from io import BytesIO + + +if torch.cuda.is_available(): + device = torch.device("cuda") + dtype = torch.bfloat16 +else: + device = torch.device("cpu") + dtype = torch.float32 + + +from torch import nn +from transformers.models.paligemma.modeling_paligemma import PaliGemmaForConditionalGeneration, PaliGemmaPreTrainedModel + +class ColPali(PaliGemmaPreTrainedModel): + def __init__(self, config): + super(ColPali, self).__init__(config=config) + self.model: PaliGemmaForConditionalGeneration = PaliGemmaForConditionalGeneration(config) + self.dim = 128 + self.custom_text_proj = nn.Linear(self.model.config.text_config.hidden_size, self.dim) + self.main_input_name = "doc_input_ids" + + def forward(self, *args, **kwargs): + """ + Forward pass through Llama and the linear layer for dimensionality reduction + + Args: + - input_ids (torch.LongTensor): "**********" + - attention_mask (torch.LongTensor): The attention mask tensor. + + Returns: + - torch.Tensor: "**********" + """ + outputs = self.model(*args, output_hidden_states=True, **kwargs) + last_hidden_states = outputs.hidden_states[-1] + proj = self.custom_text_proj(last_hidden_states) + # normalize l2 norm + proj = proj / proj.norm(dim=-1, keepdim=True) + proj = proj * kwargs["attention_mask"].unsqueeze(-1) + return proj + + + +model_name = "vidore/colpali" +model = ColPali.from_pretrained("google/paligemma-3b-mix-448", torch_dtype=dtype, device_map=device).eval() +model.load_adapter(model_name) +model.to(device) +processor = AutoProcessor.from_pretrained(model_name) + +#BERT_Article.pdf: https://arxiv.org/pdf/1810.04805 +#Transformers_Article.pdf: https://arxiv.org/pdf/1706.03762 + +pdfs = [{"file_name": "data/BERT_Article.pdf"}, {"file_name": "data/Transformers_Article.pdf"}] + + +import requests +from pdf2image import convert_from_path +from pypdf import PdfReader + +def preprocessing(pdfs): + documents = [] + images = [] + metadata = [] + for pdf in pdfs: + file_name = pdf["file_name"] + reader = PdfReader(file_name) + for page_number in range(len(reader.pages)): + page = reader.pages[page_number] + text = page.extract_text() + documents.append(text) + metadata.append({"page": page_number, "file_path": file_name}) + images_for_file = convert_from_path(file_name) + images += images_for_file + assert len(images) == len(documents) + assert len(metadata) == len(documents) + return documents, images, metadata + +documents, images, metadata = preprocessing(pdfs) + +from pdf2image import convert_from_path +from PIL import Image +from torch.utils.data import DataLoader +from tqdm import tqdm +from transformers import AutoProcessor + +def indexing(images): + ds = [] + dataloader = DataLoader( + images, + batch_size=1, + shuffle=False, + collate_fn=lambda x: process_images(processor, x), + ) + for batch_doc in tqdm(dataloader): + with torch.no_grad(): + batch_doc = {k: v.to(device) for k, v in batch_doc.items()} + embeddings_doc = model(**batch_doc) + ds.extend(list(torch.unbind(embeddings_doc.to("cpu")))) + return ds + +# Help function to process the images into the right (data) format +def process_images(processor, images, max_length: int = 50): + texts_doc = ["Describe the image."] * len(images) + images = [image.convert("RGB") for image in images] + + batch_doc = processor( + text=texts_doc, + images=images, + return_tensors="pt", + padding="longest", + max_length=max_length + processor.image_seq_length, + ) + return batch_doc + +index = indexing(images) + + +# The model requires a mock image to be added to the query. +mock_image = Image.new("RGB", (448, 448), (255, 255, 255)) + +def search(query: str, index, documents, images, metadata, k=5): + # text, images, and metadata are just passed without processing + qs = [] + with torch.no_grad(): + batch_query = process_queries(processor, [query], mock_image) + batch_query = {k: v.to(device) for k, v in batch_query.items()} + embeddings_query = model(**batch_query) + qs.extend(list(torch.unbind(embeddings_query.to("cpu")))) + # run evaluation + scores = evaluate_colbert(qs, index) + relevant_pages = torch.topk(scores, k, dim=1, largest=True).indices + relevant_pages = relevant_pages.squeeze() + result = [] + for p in relevant_pages: + result.append({"document": documents[p], "image": images[p], "score": scores[:,p].item(), "metadata": metadata[p]}) + return result + +# Help function to process the queries into the right (data) format +def process_queries(processor, queries, mock_image, max_length: int = 50): + texts_query = [] + for query in queries: + query = f"Question: {query}" + texts_query.append(query) + + batch_query = processor( + images=[mock_image.convert("RGB")] * len(texts_query), + # NOTE: the image is not used in batch_query but it is required for calling the processor + text=texts_query, + return_tensors="pt", + padding="longest", + max_length=max_length + processor.image_seq_length, + ) + del batch_query["pixel_values"] + + batch_query["input_ids"] = batch_query["input_ids"][..., processor.image_seq_length :] + batch_query["attention_mask"] = batch_query["attention_mask"][..., processor.image_seq_length :] + return batch_query + +# Help function to calculate the scores between queries and documents +def evaluate_colbert(qs, ps, batch_size=128) -> torch.Tensor: + scores = [] + for i in range(0, len(qs), batch_size): + scores_batch = [] + qs_batch = torch.nn.utils.rnn.pad_sequence(qs[i : i + batch_size], batch_first=True, padding_value=0).to(device) + for j in range(0, len(ps), batch_size): + ps_batch = torch.nn.utils.rnn.pad_sequence( + ps[j : j + batch_size], batch_first=True, padding_value=0 + ).to(device) + scores_batch.append(torch.einsum("bnd,csd->bcns", qs_batch, ps_batch).max(dim=3)[0].sum(dim=2)) + scores_batch = torch.cat(scores_batch, dim=1).cpu() + scores.append(scores_batch) + scores = torch.cat(scores, dim=0) + return scores + +# Function for image processing +def scale_image(image: Image.Image, new_height: int = 1024) -> Image.Image: + """ + Scale an image to a new height while maintaining the aspect ratio. + """ + # Calculate the scaling factor + width, height = image.size + aspect_ratio = width / height + new_width = int(new_height * aspect_ratio) + + # Resize the image + scaled_image = image.resize((new_width, new_height)) + + return scaled_image + +query = "How many transformers blocks in BERT Base? Justify your answer." +retrieved_documents = search(query=query, index=index, documents=documents, images=images, metadata=metadata, k=3) + + +from IPython.display import display, HTML +import io +import base64 + +def display_images(retrieved_documents): + html = "" + + for r in retrieved_documents: + img = r["image"] # Assuming this is a PIL Image object + title1 = f"File: {r['metadata']['file_path']}" # Extracting the title from metadata + title2 = f"Page: {r['metadata']['page']}" # Extracting the title from metadata + title3 = f"Score: {r['score']}" # Extracting the title from metadata + + # Save the image to a BytesIO object + img_byte_arr = io.BytesIO() + img.save(img_byte_arr, format='PNG') # Save as PNG or any other format + img_byte_arr.seek(0) # Move to the beginning of the BytesIO object + img_data = img_byte_arr.getvalue() + img_base64 = base64.b64encode(img_data).decode('utf-8') # Encode to base64 + + # Create HTML for image with titles above + html += f""" + + """ + + html += "
+
+ {title1}
{title2}
{title3} +
+ +
" + display(HTML(html)) + +# Example usage +#display_images(retrieved_documents) + +import base64 +import io + +# Function to process images +def get_base64_image(img: str | Image.Image, add_url_prefix: bool = True) -> str: + """ + Convert an image (from a filepath or a PIL.Image object) to a JPEG-base64 string. + """ + if isinstance(img, str): + img = Image.open(img) + elif isinstance(img, Image.Image): + pass + else: + raise ValueError("`img` must be a path to an image or a PIL Image object.") + + buffered = io.BytesIO() + img.save(buffered, format="jpeg") + b64_data = base64.b64encode(buffered.getvalue()).decode("utf-8") + + return f"data:image/jpeg;base64,{b64_data}" if add_url_prefix else b64_data + + +# Format the images in the right format for the prompt +def convert_documents_to_prompt(retrieved_documents): + images_for_vlm = [] + for r in retrieved_documents: + images_for_vlm.append( + { + "type": "image_url", + "image_url": {"url": get_base64_image(r["image"])} + }) + return images_for_vlm + +images_for_vlm = convert_documents_to_prompt(retrieved_documents) + +images_raw = [r["image"] for r in retrieved_documents] + + +from openai import OpenAI + +# Visual Language Model +def vlm(prompt, retrieved_documents): + + images_for_vlm = convert_documents_to_prompt(retrieved_documents) + print(images_for_vlm) + print(prompt) + content = [{"type": "text", "text": prompt}] + images_for_vlm + + client = OpenAI() + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + { + "role": "user", + "content": content + } + ], + max_tokens= "**********" + ) + return response.choices[0].message.content + +from transformers import AutoProcessor, LlavaForConditionalGeneration + +start = time.time() + +model = LlavaForConditionalGeneration.from_pretrained("llava-hf/llava-1.5-7b-hf") +processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf") +prompt = f"USER: {''*len(images_raw)}\n{query} ASSISTANT:" +inputs = processor(text=prompt, images=images_raw, return_tensors="pt") +generate_ids = "**********"=100) +print(processor.batch_decode(generate_ids, skip_special_tokens= "**********"=False)[0]) +print(f"Elapsed {time.time() - start}") + + +#from IPython.display import display, Markdown +#result = vlm(prompt=query, retrieved_documents=retrieved_documents) +#print(result) +#display(Markdown(result))uery, retrieved_documents=retrieved_documents) +#print(result) +#display(Markdown(result)) \ No newline at end of file