Skip to content

Commit

Permalink
2024-08-20 17:12:29.821398 new snippets
Browse files Browse the repository at this point in the history
  • Loading branch information
eduardocerqueira committed Aug 20, 2024
1 parent 4cf0336 commit df69499
Show file tree
Hide file tree
Showing 13 changed files with 877 additions and 0 deletions.
23 changes: 23 additions & 0 deletions seeker/report.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
--------------------------------------------------------------------------------
2024-08-20 17:12:29.821398
--------------------------------------------------------------------------------
On branch main
Your branch is up to date with 'origin/main'.

Untracked files:
(use "git add <file>..." to include in what will be committed)
snippet/dedup_roaster.py
snippet/file_to_env.sh
snippet/hw_m4_fake_math.py
snippet/hw_m4_true_math.py
snippet/hw_module_4_1.py
snippet/install-java.command
snippet/lab2.py
snippet/lem2gtif.py
snippet/lulu.py
snippet/new_PD.py
snippet/rockpaperscissors_infinite.py
snippet/sagemaker_studio_lifecycle_config.sh

nothing added to commit but untracked files present (use "git add" to track)

--------------------------------------------------------------------------------
2024-08-19 17:13:16.613524
--------------------------------------------------------------------------------
Expand Down
55 changes: 55 additions & 0 deletions seeker/snippet/dedup_roaster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#date: 2024-08-20T16:46:29Z
#url: https://api.github.com/gists/5ad11d4d71544e6eeb18901f262328ca
#owner: https://api.github.com/users/datageek19

import pandas as pd
import glob
import os

# Step 1: List all relevant CSV files
files = glob.glob("UHCCS-1-z-*.csv") # Adjust the pattern to match your files

# Step 2: Create a dictionary to group files by month
file_groups = {}
for file in files:
# Extract the date part from the filename
date_part = os.path.basename(file).split('-')[-3:] # Extract the '01-03-2024' part
month_year = '-'.join(date_part[1:]) # Extract '03-2024'

# Group files by month-year
if month_year not in file_groups:
file_groups[month_year] = []
file_groups[month_year].append(file)

# Step 3: Process each group of files
for month_year, file_list in file_groups.items():
# Read all files for the given month into a DataFrame
month_df_list = [pd.read_csv(f) for f in file_list]
combined_df = pd.concat(month_df_list, ignore_index=True)

# Step 4: Check for duplicates based on First_Name, Last_Name, Birthdate
duplicates = combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)
duplicates_df = combined_df[duplicates]

# Print or save the duplicates found for this month
print(f"Duplicates for {month_year}:")
print(duplicates_df)

# Optional: Save duplicates to a CSV file
# duplicates_df.to_csv(f'duplicates_{month_year}.csv', index=False)
# +++++++++++++++++++++++++++
import dask.dataframe as dd

combined_df = dd.concat([dd.read_csv(f) for f in file_list])
duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute()

import dask.dataframe as dd

combined_df = dd.concat([dd.read_csv(f) for f in file_list])
duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute()


writer = pd.ExcelWriter('duplicate_report.xlsx', engine='xlsxwriter')
for month_year, duplicates_df in all_duplicates.items():
duplicates_df.to_excel(writer, sheet_name=month_year, index=False)
writer.save()
5 changes: 5 additions & 0 deletions seeker/snippet/file_to_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#date: 2024-08-20T17:11:25Z
#url: https://api.github.com/gists/f8c8885b6c29d8968184402b47914f5c
#owner: https://api.github.com/users/kaedonkers

API_KEY=`cat ~/path/to/apikey_file` && export API_KEY
9 changes: 9 additions & 0 deletions seeker/snippet/hw_m4_fake_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#date: 2024-08-20T16:51:02Z
#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02
#owner: https://api.github.com/users/PavelYar-1

def divide(first, second):
if second == 0:
return 'Ошибка'
else:
return first / second
9 changes: 9 additions & 0 deletions seeker/snippet/hw_m4_true_math.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#date: 2024-08-20T16:51:02Z
#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02
#owner: https://api.github.com/users/PavelYar-1

def divide (first, second):
if second == 0:
return 'inf'
else:
return first / second
15 changes: 15 additions & 0 deletions seeker/snippet/hw_module_4_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#date: 2024-08-20T16:49:51Z
#url: https://api.github.com/gists/57c4aae920934d9c7d78a2b1df6de13c
#owner: https://api.github.com/users/PavelYar-1

from hw_m4_fake_math import divide as fake
from hw_m4_true_math import divide as true

result1 = fake(69, 3)
result2 = fake(3, 0)
result3 = true(49, 7)
result4 = true(15, 0)
print(result1)
print(result2)
print(result3)
print(result4)
38 changes: 38 additions & 0 deletions seeker/snippet/install-java.command
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#date: 2024-08-20T16:45:31Z
#url: https://api.github.com/gists/ff44e7ad3f67fc5b59a5fde56ba0f1ef
#owner: https://api.github.com/users/ankitahuja0508

# Function to check if a value is an integer
is_integer() {
[ "$1" -eq "$1" ] 2>/dev/null
}

# Remove any existing JAVA_HOME entries in .zshrc
echo "Removing old JAVA_HOME if any..."
sed -i '' '/export JAVA_HOME=/d' ~/.zshrc

# Check if an argument is passed (the Java version)
if [ -n "$1" ]; then
# Check if the argument is an integer
if is_integer "$1"; then
echo "Installing Oracle JDK version $1..."
brew install oracle-jdk@$1 --cask
echo "setting JAVA_HOME for version $1..."
echo export "JAVA_HOME=\$(/usr/libexec/java_home -v $1)" >> ~/.zshrc
else
echo "Error: The argument '$1' is not an integer. Please provide a valid Java version number." >&2
exit 1
fi
else
echo "Installing the latest Oracle JDK..."
brew install oracle-jdk --cask
echo "setting JAVA_HOME for the latest version..."
echo export "JAVA_HOME=\$(/usr/libexec/java_home)" >> ~/.zshrc
fi

# Apply changes to .zshrc
source ~/.zshrc

# Check Java version
echo "checking java version"
java -version
32 changes: 32 additions & 0 deletions seeker/snippet/lab2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#date: 2024-08-20T16:54:43Z
#url: https://api.github.com/gists/b65cd558432b4ceb4707b9180c09ca7e
#owner: https://api.github.com/users/sydseats

#Class :CSE 1321L
#Term: fall semester
#Instructor:
#Name: Sydney Green
#Lab: 2a
name =input('enter a name:')
name2=input('enter another name:')
verb =input('enter a verb:')
adverb=input('enter an adverb:')
print ('Once upon a time, there was a person named ' + name + ' and they had a friend named ' + name2 +'.' + ' This friend would ' + verb + " " + adverb + ' while complaining to other people.')

#lab2b
print('___*___')
print('__*_*__')
print('_*_*_*_')
print('*_*_*_*')
print('_*_*_*_')
print('__*_*__')
print('___*___')



width = float (input( "Enter a width: "))
height = float (input("Enter a height: "))
area = (height * width )
perimeter = (width + height) * 2
print ("The area is",area)
print ("The perimeter is",perimeter)
140 changes: 140 additions & 0 deletions seeker/snippet/lem2gtif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#date: 2024-08-20T16:46:10Z
#url: https://api.github.com/gists/e418fe42d33f953ac4a0d0cea933b18d
#owner: https://api.github.com/users/todashuta

import os
import re
import glob
import time
from osgeo import (gdal, osr)
import numpy as np


"""
lem ファイルを GeoTIFF に変換するスクリプトです。
lem ファイルの仕様
https://www.gsi.go.jp/MAP/CD-ROM/dem5m/doc/info5m1.htm
(現時点では) lem2gtif.py が存在するフォルダ内の lem ファイルを
GeoTIFF に変換するようになっているので、このファイルを対象の
フォルダにコピーし、実行してください。
"""



def main():
# lem2gtif.py と同じフォルダ内に存在する lem ファイルを走査
basedir = os.path.dirname(os.path.abspath(__file__))
for lem_filepath in glob.glob(os.path.join(basedir, '*.lem')):
try:
# lem ファイルを GeoTIFF に変換
lem_to_gtif(lem_filepath)
except (ValueError, FileNotFoundError) as err:
print(f"Error: {err}")
print()



def lem_to_gtif(lem_filepath):
if not re.search(r'\.lem$', lem_filepath):
raise ValueError("lem ファイル以外が指定されました。")

# lem ファイルと同じ場所に csv 形式のヘッダファイルがあるはず
csv_filepath = re.sub(r'\.lem$', '.csv', lem_filepath)
# lem ファイルと同じ場所に変換した TIFF ファイルを出力する
tif_filepath = re.sub(r'\.lem$', '.tif', lem_filepath)

lem_filename = os.path.basename(lem_filepath)
csv_filename = os.path.basename(csv_filepath)
tif_filename = os.path.basename(tif_filepath)

now = time.strftime('%H:%M:%S', time.localtime())
print(f"[{now}] convert to {tif_filename}")

# ファイルが存在しているかチェック
if not os.path.exists(lem_filepath):
raise FileNotFoundError(f"{lem_filename} がみつかりません。")
if not os.path.exists(csv_filepath):
raise FileNotFoundError(f"{csv_filename} がみつかりません。")

# ヘッダファイルを読み取る
header = read_header_file(csv_filepath)
if header is None:
raise ValueError(f"{csv_filename} のパースに失敗しました。")

# GeoTIFF ファイル作成
srs = osr.SpatialReference()
srs.ImportFromEPSG(header['crs'])
driver = gdal.GetDriverByName('GTiff')
raster = driver.Create(tif_filepath, header['cols'], header['rows'], 1, gdal.GDT_Float32)
raster.SetGeoTransform((header['left'], header['xres'], 0, header['top'], 0, -header['yres']))
raster.SetProjection(srs.ExportToWkt())
band = raster.GetRasterBand(1)
band.SetNoDataValue(-9999.0)

with open(lem_filepath) as lem_f:
row = 0
# lem ファイルから1行読み取り、 GeoTIFF ファイルに1行書き込む
for line in lem_f:
line = line.replace('-1111', '-9999')
array = np.array([[-9999.0 if line[5*i+10:5*i+15] == '-9999' else int(line[5*i+10:5*i+15])/10 for i in range(header['cols'])]])
band.WriteArray(array, yoff=row)
row += 1
band.FlushCache()



def read_header_file(csv_filepath):
csv_filename = os.path.basename(csv_filepath)
header = {}

# ヘッダファイルを読み込む
with open(csv_filepath, 'r', encoding='cp932') as f:
for line in f:
k, v = line.strip().split(',')
if k == '東西方向の点数':
header['cols'] = int(v)
elif k == '南北方向の点数':
header['rows'] = int(v)
elif k == '東西方向のデータ間隔':
header['xres'] = float(v)
elif k == '南北方向のデータ間隔':
header['yres'] = float(v)
elif k == '平面直角座標系番号' or k == '座標系番号':
header['crs'] = int(v) + 6668
elif k == '区画左下X座標' or k == '区画左下X座標':
header['bottom'] = int(v) / 100
elif k == '区画左下Y座標' or k == '区画左下Y座標':
header['left'] = int(v) / 100
elif k == '区画右上X座標' or k == '区画右上X座標':
header['top'] = int(v) / 100
elif k == '区画右上Y座標' or k == '区画右上Y座標':
header['right'] = int(v) / 100

# 必要な情報が読み取れなかったら例外を投げる
if 'cols' not in header:
raise ValueError(f"{csv_filename} に「東西方向の点数」がありません。")
if 'rows' not in header:
raise ValueError(f"{csv_filename} に「南北方向の点数」がありません。")
if 'xres' not in header:
raise ValueError(f"{csv_filename} に「東西方向のデータ間隔」がありません。")
if 'yres' not in header:
raise ValueError(f"{csv_filename} に「南北方向のデータ間隔」がありません。")
if 'crs' not in header:
raise ValueError(f"{csv_filename} に「座標系番号」がありません。")
if 'bottom' not in header:
raise ValueError(f"{csv_filename} に「区画左下X座標」がありません。")
if 'left' not in header:
raise ValueError(f"{csv_filename} に「区画左下Y座標」がありません。")
if 'top' not in header:
raise ValueError(f"{csv_filename} に「区画右上X座標」がありません。")
if 'right' not in header:
raise ValueError(f"{csv_filename} に「区画右上Y座標」がありません。")

return header



main()
Loading

0 comments on commit df69499

Please sign in to comment.