-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
2024-08-20 17:12:29.821398 new snippets
- Loading branch information
1 parent
4cf0336
commit df69499
Showing
13 changed files
with
877 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#date: 2024-08-20T16:46:29Z | ||
#url: https://api.github.com/gists/5ad11d4d71544e6eeb18901f262328ca | ||
#owner: https://api.github.com/users/datageek19 | ||
|
||
import pandas as pd | ||
import glob | ||
import os | ||
|
||
# Step 1: List all relevant CSV files | ||
files = glob.glob("UHCCS-1-z-*.csv") # Adjust the pattern to match your files | ||
|
||
# Step 2: Create a dictionary to group files by month | ||
file_groups = {} | ||
for file in files: | ||
# Extract the date part from the filename | ||
date_part = os.path.basename(file).split('-')[-3:] # Extract the '01-03-2024' part | ||
month_year = '-'.join(date_part[1:]) # Extract '03-2024' | ||
|
||
# Group files by month-year | ||
if month_year not in file_groups: | ||
file_groups[month_year] = [] | ||
file_groups[month_year].append(file) | ||
|
||
# Step 3: Process each group of files | ||
for month_year, file_list in file_groups.items(): | ||
# Read all files for the given month into a DataFrame | ||
month_df_list = [pd.read_csv(f) for f in file_list] | ||
combined_df = pd.concat(month_df_list, ignore_index=True) | ||
|
||
# Step 4: Check for duplicates based on First_Name, Last_Name, Birthdate | ||
duplicates = combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False) | ||
duplicates_df = combined_df[duplicates] | ||
|
||
# Print or save the duplicates found for this month | ||
print(f"Duplicates for {month_year}:") | ||
print(duplicates_df) | ||
|
||
# Optional: Save duplicates to a CSV file | ||
# duplicates_df.to_csv(f'duplicates_{month_year}.csv', index=False) | ||
# +++++++++++++++++++++++++++ | ||
import dask.dataframe as dd | ||
|
||
combined_df = dd.concat([dd.read_csv(f) for f in file_list]) | ||
duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute() | ||
|
||
import dask.dataframe as dd | ||
|
||
combined_df = dd.concat([dd.read_csv(f) for f in file_list]) | ||
duplicates_df = combined_df[combined_df.duplicated(subset=['First_Name', 'Last_Name', 'Birthdate'], keep=False)].compute() | ||
|
||
|
||
writer = pd.ExcelWriter('duplicate_report.xlsx', engine='xlsxwriter') | ||
for month_year, duplicates_df in all_duplicates.items(): | ||
duplicates_df.to_excel(writer, sheet_name=month_year, index=False) | ||
writer.save() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#date: 2024-08-20T17:11:25Z | ||
#url: https://api.github.com/gists/f8c8885b6c29d8968184402b47914f5c | ||
#owner: https://api.github.com/users/kaedonkers | ||
|
||
API_KEY=`cat ~/path/to/apikey_file` && export API_KEY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#date: 2024-08-20T16:51:02Z | ||
#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02 | ||
#owner: https://api.github.com/users/PavelYar-1 | ||
|
||
def divide(first, second): | ||
if second == 0: | ||
return 'Ошибка' | ||
else: | ||
return first / second |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#date: 2024-08-20T16:51:02Z | ||
#url: https://api.github.com/gists/84b82cfd4ac148cd130e16cffe06af02 | ||
#owner: https://api.github.com/users/PavelYar-1 | ||
|
||
def divide (first, second): | ||
if second == 0: | ||
return 'inf' | ||
else: | ||
return first / second |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#date: 2024-08-20T16:49:51Z | ||
#url: https://api.github.com/gists/57c4aae920934d9c7d78a2b1df6de13c | ||
#owner: https://api.github.com/users/PavelYar-1 | ||
|
||
from hw_m4_fake_math import divide as fake | ||
from hw_m4_true_math import divide as true | ||
|
||
result1 = fake(69, 3) | ||
result2 = fake(3, 0) | ||
result3 = true(49, 7) | ||
result4 = true(15, 0) | ||
print(result1) | ||
print(result2) | ||
print(result3) | ||
print(result4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#date: 2024-08-20T16:45:31Z | ||
#url: https://api.github.com/gists/ff44e7ad3f67fc5b59a5fde56ba0f1ef | ||
#owner: https://api.github.com/users/ankitahuja0508 | ||
|
||
# Function to check if a value is an integer | ||
is_integer() { | ||
[ "$1" -eq "$1" ] 2>/dev/null | ||
} | ||
|
||
# Remove any existing JAVA_HOME entries in .zshrc | ||
echo "Removing old JAVA_HOME if any..." | ||
sed -i '' '/export JAVA_HOME=/d' ~/.zshrc | ||
|
||
# Check if an argument is passed (the Java version) | ||
if [ -n "$1" ]; then | ||
# Check if the argument is an integer | ||
if is_integer "$1"; then | ||
echo "Installing Oracle JDK version $1..." | ||
brew install oracle-jdk@$1 --cask | ||
echo "setting JAVA_HOME for version $1..." | ||
echo export "JAVA_HOME=\$(/usr/libexec/java_home -v $1)" >> ~/.zshrc | ||
else | ||
echo "Error: The argument '$1' is not an integer. Please provide a valid Java version number." >&2 | ||
exit 1 | ||
fi | ||
else | ||
echo "Installing the latest Oracle JDK..." | ||
brew install oracle-jdk --cask | ||
echo "setting JAVA_HOME for the latest version..." | ||
echo export "JAVA_HOME=\$(/usr/libexec/java_home)" >> ~/.zshrc | ||
fi | ||
|
||
# Apply changes to .zshrc | ||
source ~/.zshrc | ||
|
||
# Check Java version | ||
echo "checking java version" | ||
java -version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#date: 2024-08-20T16:54:43Z | ||
#url: https://api.github.com/gists/b65cd558432b4ceb4707b9180c09ca7e | ||
#owner: https://api.github.com/users/sydseats | ||
|
||
#Class :CSE 1321L | ||
#Term: fall semester | ||
#Instructor: | ||
#Name: Sydney Green | ||
#Lab: 2a | ||
name =input('enter a name:') | ||
name2=input('enter another name:') | ||
verb =input('enter a verb:') | ||
adverb=input('enter an adverb:') | ||
print ('Once upon a time, there was a person named ' + name + ' and they had a friend named ' + name2 +'.' + ' This friend would ' + verb + " " + adverb + ' while complaining to other people.') | ||
|
||
#lab2b | ||
print('___*___') | ||
print('__*_*__') | ||
print('_*_*_*_') | ||
print('*_*_*_*') | ||
print('_*_*_*_') | ||
print('__*_*__') | ||
print('___*___') | ||
|
||
|
||
|
||
width = float (input( "Enter a width: ")) | ||
height = float (input("Enter a height: ")) | ||
area = (height * width ) | ||
perimeter = (width + height) * 2 | ||
print ("The area is",area) | ||
print ("The perimeter is",perimeter) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#date: 2024-08-20T16:46:10Z | ||
#url: https://api.github.com/gists/e418fe42d33f953ac4a0d0cea933b18d | ||
#owner: https://api.github.com/users/todashuta | ||
|
||
import os | ||
import re | ||
import glob | ||
import time | ||
from osgeo import (gdal, osr) | ||
import numpy as np | ||
|
||
|
||
""" | ||
lem ファイルを GeoTIFF に変換するスクリプトです。 | ||
lem ファイルの仕様 | ||
https://www.gsi.go.jp/MAP/CD-ROM/dem5m/doc/info5m1.htm | ||
(現時点では) lem2gtif.py が存在するフォルダ内の lem ファイルを | ||
GeoTIFF に変換するようになっているので、このファイルを対象の | ||
フォルダにコピーし、実行してください。 | ||
""" | ||
|
||
|
||
|
||
def main(): | ||
# lem2gtif.py と同じフォルダ内に存在する lem ファイルを走査 | ||
basedir = os.path.dirname(os.path.abspath(__file__)) | ||
for lem_filepath in glob.glob(os.path.join(basedir, '*.lem')): | ||
try: | ||
# lem ファイルを GeoTIFF に変換 | ||
lem_to_gtif(lem_filepath) | ||
except (ValueError, FileNotFoundError) as err: | ||
print(f"Error: {err}") | ||
print() | ||
|
||
|
||
|
||
def lem_to_gtif(lem_filepath): | ||
if not re.search(r'\.lem$', lem_filepath): | ||
raise ValueError("lem ファイル以外が指定されました。") | ||
|
||
# lem ファイルと同じ場所に csv 形式のヘッダファイルがあるはず | ||
csv_filepath = re.sub(r'\.lem$', '.csv', lem_filepath) | ||
# lem ファイルと同じ場所に変換した TIFF ファイルを出力する | ||
tif_filepath = re.sub(r'\.lem$', '.tif', lem_filepath) | ||
|
||
lem_filename = os.path.basename(lem_filepath) | ||
csv_filename = os.path.basename(csv_filepath) | ||
tif_filename = os.path.basename(tif_filepath) | ||
|
||
now = time.strftime('%H:%M:%S', time.localtime()) | ||
print(f"[{now}] convert to {tif_filename}") | ||
|
||
# ファイルが存在しているかチェック | ||
if not os.path.exists(lem_filepath): | ||
raise FileNotFoundError(f"{lem_filename} がみつかりません。") | ||
if not os.path.exists(csv_filepath): | ||
raise FileNotFoundError(f"{csv_filename} がみつかりません。") | ||
|
||
# ヘッダファイルを読み取る | ||
header = read_header_file(csv_filepath) | ||
if header is None: | ||
raise ValueError(f"{csv_filename} のパースに失敗しました。") | ||
|
||
# GeoTIFF ファイル作成 | ||
srs = osr.SpatialReference() | ||
srs.ImportFromEPSG(header['crs']) | ||
driver = gdal.GetDriverByName('GTiff') | ||
raster = driver.Create(tif_filepath, header['cols'], header['rows'], 1, gdal.GDT_Float32) | ||
raster.SetGeoTransform((header['left'], header['xres'], 0, header['top'], 0, -header['yres'])) | ||
raster.SetProjection(srs.ExportToWkt()) | ||
band = raster.GetRasterBand(1) | ||
band.SetNoDataValue(-9999.0) | ||
|
||
with open(lem_filepath) as lem_f: | ||
row = 0 | ||
# lem ファイルから1行読み取り、 GeoTIFF ファイルに1行書き込む | ||
for line in lem_f: | ||
line = line.replace('-1111', '-9999') | ||
array = np.array([[-9999.0 if line[5*i+10:5*i+15] == '-9999' else int(line[5*i+10:5*i+15])/10 for i in range(header['cols'])]]) | ||
band.WriteArray(array, yoff=row) | ||
row += 1 | ||
band.FlushCache() | ||
|
||
|
||
|
||
def read_header_file(csv_filepath): | ||
csv_filename = os.path.basename(csv_filepath) | ||
header = {} | ||
|
||
# ヘッダファイルを読み込む | ||
with open(csv_filepath, 'r', encoding='cp932') as f: | ||
for line in f: | ||
k, v = line.strip().split(',') | ||
if k == '東西方向の点数': | ||
header['cols'] = int(v) | ||
elif k == '南北方向の点数': | ||
header['rows'] = int(v) | ||
elif k == '東西方向のデータ間隔': | ||
header['xres'] = float(v) | ||
elif k == '南北方向のデータ間隔': | ||
header['yres'] = float(v) | ||
elif k == '平面直角座標系番号' or k == '座標系番号': | ||
header['crs'] = int(v) + 6668 | ||
elif k == '区画左下X座標' or k == '区画左下X座標': | ||
header['bottom'] = int(v) / 100 | ||
elif k == '区画左下Y座標' or k == '区画左下Y座標': | ||
header['left'] = int(v) / 100 | ||
elif k == '区画右上X座標' or k == '区画右上X座標': | ||
header['top'] = int(v) / 100 | ||
elif k == '区画右上Y座標' or k == '区画右上Y座標': | ||
header['right'] = int(v) / 100 | ||
|
||
# 必要な情報が読み取れなかったら例外を投げる | ||
if 'cols' not in header: | ||
raise ValueError(f"{csv_filename} に「東西方向の点数」がありません。") | ||
if 'rows' not in header: | ||
raise ValueError(f"{csv_filename} に「南北方向の点数」がありません。") | ||
if 'xres' not in header: | ||
raise ValueError(f"{csv_filename} に「東西方向のデータ間隔」がありません。") | ||
if 'yres' not in header: | ||
raise ValueError(f"{csv_filename} に「南北方向のデータ間隔」がありません。") | ||
if 'crs' not in header: | ||
raise ValueError(f"{csv_filename} に「座標系番号」がありません。") | ||
if 'bottom' not in header: | ||
raise ValueError(f"{csv_filename} に「区画左下X座標」がありません。") | ||
if 'left' not in header: | ||
raise ValueError(f"{csv_filename} に「区画左下Y座標」がありません。") | ||
if 'top' not in header: | ||
raise ValueError(f"{csv_filename} に「区画右上X座標」がありません。") | ||
if 'right' not in header: | ||
raise ValueError(f"{csv_filename} に「区画右上Y座標」がありません。") | ||
|
||
return header | ||
|
||
|
||
|
||
main() |
Oops, something went wrong.