2024-08-20 17:12:29.821398 new snippets

eduardocerqueira · Aug 20, 2024 · df69499 · df69499
1 parent 4cf0336
commit df69499
Show file tree

Hide file tree

Showing 13 changed files with 877 additions and 0 deletions.
diff --git a/seeker/report.txt b/seeker/report.txt
@@ -1,3 +1,26 @@
+--------------------------------------------------------------------------------
+ 2024-08-20 17:12:29.821398
+--------------------------------------------------------------------------------
+  On branch main
+Your branch is up to date with 'origin/main'.
+
+Untracked files:
+  (use "git add <file>..." to include in what will be committed)
+	snippet/dedup_roaster.py
+	snippet/file_to_env.sh
+	snippet/hw_m4_fake_math.py
+	snippet/hw_m4_true_math.py
+	snippet/hw_module_4_1.py
+	snippet/install-java.command
+	snippet/lab2.py
+	snippet/lem2gtif.py
+	snippet/lulu.py
+	snippet/new_PD.py
+	snippet/rockpaperscissors_infinite.py
+	snippet/sagemaker_studio_lifecycle_config.sh
+
+nothing added to commit but untracked files present (use "git add" to track)
+
 --------------------------------------------------------------------------------
  2024-08-19 17:13:16.613524
 --------------------------------------------------------------------------------

diff --git a/seeker/snippet/dedup_roaster.py b/seeker/snippet/dedup_roaster.py
@@ -0,0 +1,55 @@
+#date: 2024-08-20T16:46:29Z
+#url: https://api.github.com/gists/5ad11d4d71544e6eeb18901f262328ca
+#owner: https://api.github.com/users/datageek19
+
+import pandas as pd
+import glob
+import os
+
+# Step 1: List all relevant CSV files
+files = glob.glob("UHCCS-1-z-*.csv")  # Adjust the pattern to match your files
+
+# Step 2: Create a dictionary to group files by month
+file_groups = {}
+for file in files:
+    # Extract the date part from the filename
+    date_part = os.path.basename(file).split('-')[-3:]  # Extract the '01-03-2024' part
+    month_year = '-'.join(date_part[1:])  # Extract '03-2024'
+
+    # Group files by month-year
+    if month_year not in file_groups:
+        file_groups[month_year] = []
+    file_groups[month_year].append(file)
+
+# Step 3: Process each group of files
+for month_year, file_list in file_groups.items():
+    # Read all files for the given month into a DataFrame
+    month_df_list = [pd.read_csv(f) for f in file_list]
+    combined_df = pd.concat(month_df_list, ignore_index=True)
+
+    # Step 4: Check for duplicates based on First_Name, Last_Name, Birthdate
+    duplicates = combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)
+    duplicates_df = combined_df[duplicates]
+
+    # Print or save the duplicates found for this month
+    print(f"Duplicates for {month_year}:")
+    print(duplicates_df)
+
+    # Optional: Save duplicates to a CSV file
+    # duplicates_df.to_csv(f'duplicates_{month_year}.csv', index=False)
+# +++++++++++++++++++++++++++
+import dask.dataframe as dd
+
+combined_df = dd.concat([dd.read_csv(f) for f in file_list])
+duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute()
+
+import dask.dataframe as dd
+
+combined_df = dd.concat([dd.read_csv(f) for f in file_list])
+duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute()
+
+
+writer = pd.ExcelWriter('duplicate_report.xlsx', engine='xlsxwriter')
+for month_year, duplicates_df in all_duplicates.items():
+    duplicates_df.to_excel(writer, sheet_name=month_year, index=False)
+writer.save()
diff --git a/seeker/snippet/file_to_env.sh b/seeker/snippet/file_to_env.sh
@@ -0,0 +1,5 @@
+#date: 2024-08-20T17:11:25Z
+#url: https://api.github.com/gists/f8c8885b6c29d8968184402b47914f5c
+#owner: https://api.github.com/users/kaedonkers
+
+API_KEY=`cat ~/path/to/apikey_file` && export API_KEY
diff --git a/seeker/snippet/hw_m4_fake_math.py b/seeker/snippet/hw_m4_fake_math.py
@@ -0,0 +1,9 @@
+#date: 2024-08-20T16:51:02Z
+#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02
+#owner: https://api.github.com/users/PavelYar-1
+
+def divide(first, second):
+    if second == 0:
+        return 'Ошибка'
+    else:
+        return first / second
diff --git a/seeker/snippet/hw_m4_true_math.py b/seeker/snippet/hw_m4_true_math.py
@@ -0,0 +1,9 @@
+#date: 2024-08-20T16:51:02Z
+#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02
+#owner: https://api.github.com/users/PavelYar-1
+
+def divide (first, second):
+    if second == 0:
+        return 'inf'
+    else:
+        return first / second
diff --git a/seeker/snippet/hw_module_4_1.py b/seeker/snippet/hw_module_4_1.py
@@ -0,0 +1,15 @@
+#date: 2024-08-20T16:49:51Z
+#url: https://api.github.com/gists/57c4aae920934d9c7d78a2b1df6de13c
+#owner: https://api.github.com/users/PavelYar-1
+
+from hw_m4_fake_math import divide as fake
+from hw_m4_true_math import  divide as true
+
+result1 = fake(69, 3)
+result2 = fake(3, 0)
+result3 = true(49, 7)
+result4 = true(15, 0)
+print(result1)
+print(result2)
+print(result3)
+print(result4)
diff --git a/seeker/snippet/install-java.command b/seeker/snippet/install-java.command
@@ -0,0 +1,38 @@
+#date: 2024-08-20T16:45:31Z
+#url: https://api.github.com/gists/ff44e7ad3f67fc5b59a5fde56ba0f1ef
+#owner: https://api.github.com/users/ankitahuja0508
+
+# Function to check if a value is an integer
+is_integer() {
+    [ "$1" -eq "$1" ] 2>/dev/null
+}
+
+# Remove any existing JAVA_HOME entries in .zshrc
+echo "Removing old JAVA_HOME if any..."
+sed -i '' '/export JAVA_HOME=/d' ~/.zshrc
+
+# Check if an argument is passed (the Java version)
+if [ -n "$1" ]; then
+    # Check if the argument is an integer
+    if is_integer "$1"; then
+        echo "Installing Oracle JDK version $1..."
+        brew install oracle-jdk@$1 --cask
+        echo "setting JAVA_HOME for version $1..."
+        echo export "JAVA_HOME=\$(/usr/libexec/java_home -v $1)" >> ~/.zshrc
+    else
+        echo "Error: The argument '$1' is not an integer. Please provide a valid Java version number." >&2
+        exit 1
+    fi
+else
+    echo "Installing the latest Oracle JDK..."
+    brew install oracle-jdk --cask
+    echo "setting JAVA_HOME for the latest version..."
+    echo export "JAVA_HOME=\$(/usr/libexec/java_home)" >> ~/.zshrc
+fi
+
+# Apply changes to .zshrc
+source ~/.zshrc
+
+# Check Java version
+echo "checking java version"
+java -version
diff --git a/seeker/snippet/lab2.py b/seeker/snippet/lab2.py
@@ -0,0 +1,32 @@
+#date: 2024-08-20T16:54:43Z
+#url: https://api.github.com/gists/b65cd558432b4ceb4707b9180c09ca7e
+#owner: https://api.github.com/users/sydseats
+
+#Class :CSE 1321L
+#Term: fall semester
+#Instructor:
+#Name: Sydney Green
+#Lab: 2a
+name  =input('enter a name:')
+name2=input('enter another name:')
+verb  =input('enter a verb:')
+adverb=input('enter an adverb:')
+print ('Once upon a time, there was a person named ' + name + ' and they had a friend named ' + name2 +'.' + ' This friend would ' + verb + " " + adverb + ' while complaining to other people.')
+
+#lab2b
+print('___*___')
+print('__*_*__')
+print('_*_*_*_')
+print('*_*_*_*')
+print('_*_*_*_')
+print('__*_*__')
+print('___*___')
+
+
+
+width = float (input( "Enter a width: "))
+height = float (input("Enter a height: "))
+area = (height * width )
+perimeter = (width + height) * 2
+print ("The area is",area)
+print ("The perimeter is",perimeter)
diff --git a/seeker/snippet/lem2gtif.py b/seeker/snippet/lem2gtif.py
@@ -0,0 +1,140 @@
+#date: 2024-08-20T16:46:10Z
+#url: https://api.github.com/gists/e418fe42d33f953ac4a0d0cea933b18d
+#owner: https://api.github.com/users/todashuta
+
+import os
+import re
+import glob
+import time
+from osgeo import (gdal, osr)
+import numpy as np
+
+
+"""
+lem ファイルを GeoTIFF に変換するスクリプトです。
+
+lem ファイルの仕様
+https://www.gsi.go.jp/MAP/CD-ROM/dem5m/doc/info5m1.htm
+
+
+（現時点では） lem2gtif.py が存在するフォルダ内の lem ファイルを
+GeoTIFF に変換するようになっているので、このファイルを対象の
+フォルダにコピーし、実行してください。
+"""
+
+
+
+def main():
+    # lem2gtif.py と同じフォルダ内に存在する lem ファイルを走査
+    basedir = os.path.dirname(os.path.abspath(__file__))
+    for lem_filepath in glob.glob(os.path.join(basedir, '*.lem')):
+        try:
+            # lem ファイルを GeoTIFF に変換
+            lem_to_gtif(lem_filepath)
+        except (ValueError, FileNotFoundError) as err:
+            print(f"Error: {err}")
+            print()
+
+
+
+def lem_to_gtif(lem_filepath):
+    if not re.search(r'\.lem$', lem_filepath):
+        raise ValueError("lem ファイル以外が指定されました。")
+
+    # lem ファイルと同じ場所に csv 形式のヘッダファイルがあるはず
+    csv_filepath = re.sub(r'\.lem$', '.csv', lem_filepath)
+    # lem ファイルと同じ場所に変換した TIFF ファイルを出力する
+    tif_filepath = re.sub(r'\.lem$', '.tif', lem_filepath)
+
+    lem_filename = os.path.basename(lem_filepath)
+    csv_filename = os.path.basename(csv_filepath)
+    tif_filename = os.path.basename(tif_filepath)
+
+    now = time.strftime('%H:%M:%S', time.localtime())
+    print(f"[{now}] convert to {tif_filename}")
+
+    # ファイルが存在しているかチェック
+    if not os.path.exists(lem_filepath):
+        raise FileNotFoundError(f"{lem_filename} がみつかりません。")
+    if not os.path.exists(csv_filepath):
+        raise FileNotFoundError(f"{csv_filename} がみつかりません。")
+
+    # ヘッダファイルを読み取る
+    header = read_header_file(csv_filepath)
+    if header is None:
+        raise ValueError(f"{csv_filename} のパースに失敗しました。")
+
+    # GeoTIFF ファイル作成
+    srs = osr.SpatialReference()
+    srs.ImportFromEPSG(header['crs'])
+    driver = gdal.GetDriverByName('GTiff')
+    raster = driver.Create(tif_filepath, header['cols'], header['rows'], 1, gdal.GDT_Float32)
+    raster.SetGeoTransform((header['left'], header['xres'], 0, header['top'], 0, -header['yres']))
+    raster.SetProjection(srs.ExportToWkt())
+    band = raster.GetRasterBand(1)
+    band.SetNoDataValue(-9999.0)
+
+    with open(lem_filepath) as lem_f:
+        row = 0
+        # lem ファイルから1行読み取り、 GeoTIFF ファイルに1行書き込む
+        for line in lem_f:
+            line = line.replace('-1111', '-9999')
+            array = np.array([[-9999.0 if line[5*i+10:5*i+15] == '-9999' else int(line[5*i+10:5*i+15])/10 for i in range(header['cols'])]])
+            band.WriteArray(array, yoff=row)
+            row += 1
+        band.FlushCache()
+
+
+
+def read_header_file(csv_filepath):
+    csv_filename = os.path.basename(csv_filepath)
+    header = {}
+
+    # ヘッダファイルを読み込む
+    with open(csv_filepath, 'r', encoding='cp932') as f:
+        for line in f:
+            k, v = line.strip().split(',')
+            if k == '東西方向の点数':
+                header['cols'] = int(v)
+            elif k == '南北方向の点数':
+                header['rows'] = int(v)
+            elif k == '東西方向のデータ間隔':
+                header['xres'] = float(v)
+            elif k == '南北方向のデータ間隔':
+                header['yres'] = float(v)
+            elif k == '平面直角座標系番号' or k == '座標系番号':
+                header['crs'] = int(v) + 6668
+            elif k == '区画左下X座標' or k == '区画左下Ｘ座標':
+                header['bottom'] = int(v) / 100
+            elif k == '区画左下Y座標' or k == '区画左下Ｙ座標':
+                header['left'] = int(v) / 100
+            elif k == '区画右上X座標' or k == '区画右上Ｘ座標':
+                header['top'] = int(v) / 100
+            elif k == '区画右上Y座標' or k == '区画右上Ｙ座標':
+                header['right'] = int(v) / 100
+
+    # 必要な情報が読み取れなかったら例外を投げる
+    if 'cols' not in header:
+        raise ValueError(f"{csv_filename} に「東西方向の点数」がありません。")
+    if 'rows' not in header:
+        raise ValueError(f"{csv_filename} に「南北方向の点数」がありません。")
+    if 'xres' not in header:
+        raise ValueError(f"{csv_filename} に「東西方向のデータ間隔」がありません。")
+    if 'yres' not in header:
+        raise ValueError(f"{csv_filename} に「南北方向のデータ間隔」がありません。")
+    if 'crs' not in header:
+        raise ValueError(f"{csv_filename} に「座標系番号」がありません。")
+    if 'bottom' not in header:
+        raise ValueError(f"{csv_filename} に「区画左下Ｘ座標」がありません。")
+    if 'left' not in header:
+        raise ValueError(f"{csv_filename} に「区画左下Ｙ座標」がありません。")
+    if 'top' not in header:
+        raise ValueError(f"{csv_filename} に「区画右上Ｘ座標」がありません。")
+    if 'right' not in header:
+        raise ValueError(f"{csv_filename} に「区画右上Ｙ座標」がありません。")
+
+    return header
+
+
+
+main()