Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

I am working on a project in my internship and I am training a model which detects the distresses on road for this i have multiple images that have already been labeled by myself only and i have also written code for that but it does not able to work properly so want the guidance for that. #1224

Open
aditya2k4anu opened this issue Jul 11, 2024 · 0 comments

Comments

@aditya2k4anu
Copy link

import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Embedding, LSTM, TimeDistributed, RepeatVector
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.patches as patches

Step 1: Load the data from the directories

image_dir = 'D:/Image Classification using CNN/1'
text_dir = 'D:/Image Classification using CNN/1-Labled'

image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])
image_paths = [os.path.join(image_dir, f) for f in image_files]
text_paths = [os.path.join(text_dir, f.replace('.jpg', '.txt')) for f in image_files]

images = []
labels = []

Function to load labels from text file

def load_labels(label_path):
with open(label_path, 'r') as file:
return [float(x) for x in file.read().strip().split()]

target_size = (150, 150) # Define your target size here

for img_path, txt_path in zip(image_paths, text_paths):
try:
img = load_img(img_path, target_size=target_size) # Load and resize to target_size
img = img_to_array(img)
img = img / 255.0 # Normalize to [0, 1]

    lbl = load_labels(txt_path)
    
    images.append(img)
    labels.append(lbl)
except Exception as e:
    print(f"Error loading {img_path} or {txt_path}: {e}")

Step 2: Pad the labels to the maximum length

max_label_length = max(len(label) for label in labels)
labels = pad_sequences(labels, maxlen=max_label_length, padding='post', dtype='float32')

Convert lists to numpy arrays

images = np.array(images)
labels = np.array(labels)

Split the data into training and validation sets

x_train, x_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

Step 3: Build the model

Image model

image_input = Input(shape=(target_size[0], target_size[1], 3)) # Use target_size dimensions
x = Conv2D(32, (3, 3), activation='relu')(image_input)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = MaxPooling2D(2, 2)(x)
x = Flatten()(x)
x = Dense(512, activation='relu')(x)
x = Dropout(0.5)(x)
image_features = Dense(256, activation='relu')(x)

Label model

label_input = Input(shape=(max_label_length,))
embedded_labels = Embedding(input_dim=1000, output_dim=256, input_length=max_label_length)(label_input)
lstm_out = LSTM(256, return_sequences=True)(embedded_labels)

Repeat image features to match the label sequence length

repeated_image_features = RepeatVector(max_label_length)(image_features)

Combining repeated image features and LSTM output

combined = tf.keras.layers.Concatenate()([repeated_image_features, lstm_out])
output = TimeDistributed(Dense(1, activation='sigmoid'))(combined) # Adjust output for multi-label

model = Model(inputs=[image_input, label_input], outputs=output)

Step 4: Compile the model

model.compile(optimizer='adam',
loss='binary_crossentropy', # Use binary cross-entropy for multi-label classification
metrics=['accuracy', 'binary_accuracy']) # Add binary_accuracy for evaluation

Step 5: Train the model

history = model.fit(
[x_train, y_train], y_train,
epochs=20,
batch_size=20,
validation_data=([x_val, y_val], y_val)
)

Step 6: Evaluate the model

loss, accuracy, binary_accuracy = model.evaluate([x_val, y_val], y_val)
print(f"Validation accuracy: {accuracy100:.2f}%")
print(f"Validation binary accuracy: {binary_accuracy
100:.2f}%")

Step 7: Visualize predictions with annotations

def visualize_predictions(images, labels, predictions, file_names, target_size):
num_images = len(images)
for i in range(num_images):
fig, ax = plt.subplots(figsize=(images[i].shape[1] / 100, images[i].shape[0] / 100)) # Use image dimensions for figsize
ax.imshow(images[i])
ax.set_title(f"Image: {file_names[i]}")

    # Draw rectangles based on predictions
    for j, pred in enumerate(predictions[i]):
        if pred > 0.5:  # Adjust threshold as needed
            # Calculate box dimensions based on label position and target_size
            box_x = target_size[1] * (j + 1) / (max_label_length + 1)
            box_y = target_size[0] * 0.05
            box_width = target_size[1] * 0.2
            box_height = target_size[0] * 0.1
            rect = patches.Rectangle((box_x, box_y), box_width, box_height, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
    
    ax.axis('off')
    plt.show()

Predict on validation data

predictions = model.predict([x_val, y_val])

Convert predictions to binary format for visualization

predictions_binary = (predictions > 0.5).astype(int)

Display some predictions with annotations

num_visualize = min(5, len(x_val)) # Visualize up to 5 images, adjust as needed
visualize_predictions(x_val[:num_visualize], y_val[:num_visualize], predictions_binary[:num_visualize], image_files[:num_visualize], target_size)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant