Skip to content

Commit 4b27bb2

Browse files
authored
All the files regarding the project is uploaded.
Files include 1. tree.c - contains the Data Structure AVL Tree for efficiently maintaining the Genes data with fast query processing 2. tree.h - header file which contains details of functions used in tree.c 3. project.c - This is a driver program which takes the input from a file and calculates the match percentage with reference GENE. 4. database.txt - This file contains the sample database of 10 people with their name, age and GENE(1000 characters length)
1 parent fe551e3 commit 4b27bb2

File tree

4 files changed

+320
-0
lines changed

4 files changed

+320
-0
lines changed

database.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Michael 51 CGATAAGTACAGGCTGAGCTTTTGAGATTCCGAGATTAACTGCGAAATTCGGGGCCCCACTGCATCCCATACTGGAAGCTTAGCGGTACTAACGTTGGCCTCGAACACAACGCATTGTTTTCGAAAGTAGATCCTGCATCTTCACATCGCGTACTAAATCGAAGACGCTGTTCCAGGCATGAGTATGGCGCATAATGCATGAAATTGTCGTAGAGATGCAAGCAAAAGGTATCGCCTGATCAAGTCTAATTTTAGCCCTTAAGCCTATTACGTTTTTTACTCTTAAAAGGGGAATGGTACGTCCTGAAAGGGGCTTGGAAGAGGGTGATTACACCTCGTCCGTATGCACGCCGGCAGCAGCTCCCACCAGTCGAGGTCTTCATTCGAACGACTGATCGTCCGGCTGCACGTCTATTCATGGTGTGCTTACGCTTGGACAGGTGCCGCATCGCCGTTGTTTTATATCCCGCTCGTAGCGAGGACGGAGCTAACCGATCTCC
2+
Uwi 43 TTAAACTGCAGAGAGCGACAGGGGCCACGTACCATCGCCACCTGTGGGGAAGGAGGAACGCTATAAGCGTATTACTGCCGCTCATACCTAGACCTCCCGTACTTTCAAATCAATCTTCGTGGCCGCCAGTGATCTAGGCCAGTACTGTGGTCAAATTTAGCCGTACCTGGTCGATGGAGTCAGGTGGTATGCATTGGTGTAGGCAGCGCCCCTATCGGAAGCATCGATGACACATCTACAATCGCAAACTATAAGGGCCGTCCGTACCGGGCCAACTCTGAGAATGACCCGAGACCACAAGACTGTCAATCATCAGTCCCCCACGGGCGTAGGGGAGCAAAGGAGAGGTGTACGGCATATTTGCTAATACGACGGACGAAAGATTGATTGGGGACTAGCAGAGCATCTATCTCTATGATAACGCGGCATTTTTCATTAGCTAGAGGCTGCGTGTGGCGTGACTCTAACGTGGTACTCTCTGTCGGTTCCTGGAGCACGTA
3+
Gennady 27 GCGGTATTCGTAGAGATAAAGGTTTAGTGAAGTGCCGGCTGGTTCAGTCTACGTTAAGGAAAATATAAAACGCCATAAATGGGATGTTCCAACCCAGGGAGCGTCCCAAACGGGCAGGTCGGATCCTACAGTTCCAAACGACACCTCTTTGGTATCTTCACCGGAACCATGCACGGTCGAACCCACTGTAAAGAGGTTATGGTCCTTCATATTCATCTGGCGTTAGACTCTCGACGGATCAGAAAGGTCTTCGTACTTCAGGCAATGAAGCTGGATGAAGGCGTGTCTGTGAAGAAACATGTGGATGACACTGATAAAGCTGGTCGCAGTCTCGCTCCCTGCGTTGGAAAAGATCTGCAAGCTAAATCATCAATTCTCTAGGTGGATTTAAGAGCTTTACGGGTATAGTGTGTTTGGTAGGCGTTATGTGTTTGAACTACTACTTCTGGACGCGCCCCGGTCCGTGATTCATTCCTCGACTCACAGGAAGATTCGGGAAC
4+
Petr 43 CTATTTTCGCACATCGGCTGGTGACCAAGGAACGTTTTCATTGAGCCTACTCTGCGCTTCCCCTTCAGCCGCTGATTGCAGATGTCCTGACCCCATGTAGCGTACGCATGAAATACTATGGTTACTGATGTCGCGGCGGGCGAAACCGTTCATCTCTCCTCTACCGCGATTATAATAATCGTCTCGCGCCGTACAAGCGCCATATGACTCGCCGCGGCACAGGCAGGTCGTGAGGACTATGTCACGTGATCTATTACGTGCCGGGCCCTTAATTACTGCGGTTAATTAATGCATCTGATTTGACACATGGCTCTTACTAGATAAAGGAAACGGAACCACTCGGATAATTAACGTATCATTACTGACACACGGTGTGGCTCGTCGAGTGCATATTATCGATTATGCTGTGATGGGCAACACTGGTACCCAGAGCAGATAGGCGCGGTGTGAATGGTCTTCTTCATTGAAAATACTCATCGAAGTGTCACATCTATAATGTG
5+
Sumeet 24 CATCGGGCAAGAATACCAAATATAAACTTTGTTTTCAAGACTGCCTGGATCGTTCGCCCGGTTAATTGAAGTGCTTTCAACCCGCCTAACACGATACATTTGAACCGTAGGAACTCTTGATCTTCGCCTGAACCCCCTGAACAGCCTGCAGTCGCACTACCCCGCCCGGTTGAGGGCAGCACTAGGGAGGCCTCGACTACAAGGAGATGGTGACCCGGTACCCCCGAGGTGCTAGTACGTGTTGCCAACTATGAGCTAGATAGCTGTCTGTAATCAGCAGATTCCACAGTCTGTGGCGGAGGCGATCAAAATCAGGCCGTATAGATCTAATCCAGTGCATTGGTGACTTGCGTAACACGGTAATTCACGGCAGAACGCTTGTCCGGAATATCCGTGCACCAGCTGGAGCCATACATCCAACCGCTGAGGGGTCCTAGTAGTGCCGACGAGCACAAGGAGATTTTATTAAACCCGCTTTTATTGCAACATCCTAGAACCGG
6+
Evgeny 13 ATTGTCACTAGTCTCGCCGATGTATCAACTGAAGTCAACCTGTGCCGGAAGTTACCTGCCATCTTGGACGAGAGCGCATCCGGTCAAAGGAATCGTGTAGCATCCAGGGGCTTTGACCAATTAGAGCTAACTGCGGTATGCGTTGGAGATCTGTTGCCGAGTAATGTCCGATACCGTCGGATAGAATAAGGGTGGTAGACAGATCCTAGGCGGAAACACATGACATTGCCAGAACAGGCAGTCATTCGAAAGAAAACATAGAAGAACACGATAGGTCTAAGAGGCATAAGGCTGGTACCCAATATATTGAACGACCATTAACTCTAAGCCAGCCTCACGAATCGCAGCTAGCTCTAACGGTGAATTTAGCCTCTTTGACACAATGGGTGCGACCTCCGTCGAAGTCTTGAGCCGTCAAAATCGATGGGCATCTGCTAAATATAACTCACTACAACCGCCAGATAGGGCTCGTCTACAGGAGTACCTAAGGTGTTAGTGCA
7+
Pavel 71 GGGCGGCCTAATTGAGCCAGCGAGTCGCATAGGTCTGGGGTCCTCGGTTGTTGAGGATCAGTCTGCATCAGGCATTTGAAAGTACGATCAGCGAACCAGGATACCCCGGAAGAGAGGTTCGGGACCAGACAATAATACACGGATACAAACGAACCAATACCTGAGTGAGTAATGTATTCTAGAAGTTCCGGGTGTTCATAGTACCACGCATTTTTCAGTACCCCTGAAAGCTACCTAGGTTCTGTAGTTCCCTGGCAAATTGGCAGTAAGGAACCTCCGGGATTCTTATTCGCACGTCCACCGGCACACCCGGATTGCGCTTAGAAGGCGAAGTCTCGGCTCCTGAGACACAACAGGGCCCGTACAGCCACTTCACTATTATGCTATTCCGTTAATATTGACCGCCAAACTTCAAATAGATGCATAACCCTAGGTCCCAACCGCGCTCAGATCCTCGGGAGGTGACTCGAAGTTTCCTACTATATGTAATACAGCTCACC
8+
Sasha 57 TCACATGTCCGAAAACACGAGTGTCTCGGAGTGCTTCGAACACCGGGTACAGTTTTGTTGACTGAGAGTTAACAGCATTAGTATTTTAGTACATCATTCGGTGTGCCCAATGGTTGCTTGCGGAATTTCATCAGCAATTTCACAAGGTCCTATCAGACGATCTGTTCCGGGTAAGCTTTTTTCATGGAGACTCGTGGTTAGTTTAACTACACAGACGTTCAGCAGTCTCGTTGATGCTGATTTGTCGGAGCCCACGACTCCTAATTACTGTCAGGCACTACCTCGCGTCTTGCTTAATTGGCCGTTTTTGATCTCACTACGCTCCGTATACCTATTTGATCACATATTAGGGGTCTCTCGGGAACGGACATCCGCCAACCTAAATCAGGTATGGGGCTCCCAGGAAATAATGATTCTTTGTCCTTGGACCCGGGACAATCTTCTCTTTTGACAGTCAATATGCGGGAGTAGCAATTCGTTTCGGGGTTTTTGTGTTTAAG
9+
Ilya 64 AAGCCGTCGACGTAGTCCAGTACTTTTGCTCAGGTACAGTTCCGGATGGATGAACCTGATTCCGATGCCGACTTTCGCCAGAGTGGTAGAACAGAGGACGTAGGACGACTTTAGTAGCGCCAAGTCGAGACCCGATGAATAGCGCGTGGTGGAGAAAAGAGATACCATGTTCAGACCTGCATACTTAATGATGTCACTAGATCTATTAACACTCACCTACATCCAGAAACAAATACACGACGCGTCATTCACATAAGAATGATGCACTAAGACTCTATGGCGAGGGGGAGAAAGCTAGCCAAGCTGACTTGGGCTCGAGGTAGAACTTCAGGACTCATCAGCTTTTTGTCAATCCTATAGTGCTCTAGACGATCCGTTGAAGCCACACTGCCTCGAGGGTGCGCCTCGTGCCGCATCCCCCACTACACTGACTTAGATGTTTCTGTGCTCGAGTACTCCGATGTTCCAACCCTTATCAGAGAAGGCTCAGAGCGCCCGCT
10+
Andrey 21 CCCGCTAGAAGCTTTCACCTTAGGCATGAATATGGCGGACACGCCTGATGATAGGACTGGTGTATCATGCCGGCACTACGCTTCTTGCTTCAATAACCACATACTGAGAGGTAAAACCCTCAACTTGGGGCTAGCGAGTACCGTTACCCCACAATCTGCGACTCTCTGTCATCCAGGTGACTTTCGTTAACGAGGGCTAAGGAGCCGAACCACTGGGTTATACTGATGCGGTTCCCATGGTTAGGCAACCTAAAGGTTCTAAGCTTCGTCCGGCGGTCTTAAAGGGAGCTATCGAGTTATGATGCGATCGCCCGCTGTTTGGCTCATAGCACACACAGATAGTAGGGTTTGCACTGTGAATGTTCGTTTCATTTCTCCTCGGGACGGCAAATACAGGACCGAAGGTGGACCGGATATAGATGAATTTGTTGCCTGGGATGCGTCACCGAAAGCATTGTCGTGGCACATTTACTGCATCACGCTACCTAAGCTGTAATGCT

project.c

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
/*************************************************************DBMS PROJECT*************************************************************************
2+
* Project deals with database managment of the Person GENES. The Databse stores the information about the Person name, age, GENE information
3+
* (ACGT format).
4+
*
5+
* In the scientific researches GENES information is stored as well as their match percentage with the sample GENE to refine the searches accordingly.
6+
* A database is too created to get results and find out the region(range of percentage) where match percentages are are most frequently occured.
7+
* This helps them to make medicines, perform experiments with minimum risks.
8+
*
9+
* Current program have capacity to store and process the data of upto 1000 persons with their GENE length upto 500 charecters.
10+
* Program also supports for the RANGE QUERIES giving information about the users lying in the range with the details of match percentage and age.
11+
*
12+
* This sample Database is created using rand() in C.
13+
*
14+
* To enable fast queries processing, program uses Levenshtein Matching algorithm to compute the match percentage between two strings and AVL Tree to
15+
* store the match percetage accordingly.
16+
***************************************************************************************************************************************************/
17+
18+
#include <stdio.h>
19+
#include <string.h>
20+
#include <stdlib.h>
21+
#include <math.h>
22+
#include "tree.h"
23+
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))
24+
25+
int levenshteinmatch(char* s1, char* s2);
26+
27+
int main(int argc, char const *argv[])
28+
{
29+
/* CODE */
30+
node* root = NULL;
31+
32+
FILE* f = fopen("database.txt","r");
33+
34+
// If no such file exists
35+
if(f == NULL) {
36+
printf("No such file exists in the directory\n");
37+
return 0;
38+
}
39+
40+
// Sample GENE from which each users GENE will be matched
41+
char sample[500] = "GCCCCCACAGCTGAAATGGGGGCCTTCCGCATACATGATTACATGTCGGCAGTTACTGCGCTTTCACCCGGGACCACTTGTGGAAAAACTCTCTGAGTATCCCTCCGGAAGACATACATGCTGAGCTAGCCGATAAGCCGCGTAATATGGAAACAAATACGGTGACAGTGTTCAATCTAATCTACCTATTTGCGTGGGACCTTGAATTTAAAAGATCAGTTGGGTGATTATGAAGTCATCTTATCCAGTAGGAAATCCTACCTTGCTCGAACATAACCCTATGAGCGAGTAGCGAGGTACGGTAGCTAGACTTCTGAAGCTCGCTTACCCTGAAGAGACGGATTAACAAGGGTGGATTAGCTAAGTTGTAAGGGTCTGTAATATGGATAACTCGCAGTATGCGGTCCTGTTCGTGTCCTTAAGGGCAGCATCGTATTTCAGCGCAGTATACTGAGCTACCTCTGTTCATGATCGTATTTCCGTACTAGCTCGCACAGAGG";
42+
43+
44+
// Input from the file e.g. Name Age Gene (all in a single line)
45+
char nam[100],genes[500];
46+
int new_age;
47+
48+
// Taking input from file until End Of File is not reached
49+
while( fscanf (f,"%s %d %s",nam,&new_age,genes) != EOF) {
50+
51+
// Memory alloted to a node and initialised accordingly
52+
node* new_node = getnode();
53+
strcpy(new_node->name,nam);
54+
strcpy(new_node->gene,genes);
55+
new_node->age = new_age;
56+
new_node->left = new_node->right = NULL;
57+
new_node->height = 1;
58+
59+
// levenshteinmatch(sample,genes) / 5 is done because to we are taking GENES length to be 500.
60+
// And to calculate % we multiply the number by 100. So 500 is reduced to 5. Thus computing the correct results.
61+
// To compute more refine results we can use FLOAT or DOUBLE data type to store the match percentage.
62+
new_node->matchpercentage = levenshteinmatch(sample,genes) / 5;
63+
64+
// Data is inserted in the tree.
65+
root = insert(root,new_node);
66+
}
67+
68+
// Takes the range of input in the form of L R where L <= R and L and R are two integers
69+
printf("Enter the range to receive all names with their ages having GENES matching percentage lying in the range\n");
70+
printf("Input in form e.g. 24 56\n");
71+
int left_bound, right_bound;
72+
scanf("%d %d",&left_bound, &right_bound);
73+
if(left_bound > right_bound || left_bound < 0 || right_bound > 100){
74+
printf("Invalid range\n");
75+
} else {
76+
printf("Type 1 for increasing order of GENE matching percentage\n"
77+
"Type 2 for decreasing order of GENE matching percentage\n");
78+
int order;
79+
scanf("%d", &order);
80+
if(order == 1){
81+
// Displays the data in the increasing order of match percentage
82+
printf("\nName Match - Percentage Age\n");
83+
printincreasing(root,left_bound,right_bound);
84+
} else if(order == 2) {
85+
// Displays the data in the decreasing order of match percentage
86+
printf("\nName Match - Percentage Age\n");
87+
printdecreasing(root,left_bound,right_bound);
88+
} else {
89+
printf("Invalid input\n");
90+
return 0;
91+
}
92+
}
93+
fclose(f);
94+
return 0;
95+
}
96+
97+
int levenshteinmatch(char* s1, char* s2)
98+
{
99+
unsigned int x,y;
100+
unsigned int dist[501][501];
101+
dist[0][0] = 0;
102+
for(x = 1; x <= 500; x++) {
103+
dist[x][0] = dist[x - 1][0] + 1;
104+
}
105+
for(y = 1; y <= 500; y++) {
106+
dist[0][y] = dist[0][y - 1] + 1;
107+
}
108+
for(x = 1; x <= 500; x++){
109+
for(y = 1; y <= 500; y++) {
110+
dist[x][y] = MIN3( dist[x - 1][y] +1, dist[x][y - 1] + 1, dist[x - 1][y - 1] + (s1[y - 1] == s2[x - 1] ? 0 : 1));
111+
}
112+
}
113+
return dist[500][500];
114+
}

tree.c

+174
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
#include <stdio.h>
2+
#include <string.h>
3+
#include <stdlib.h>
4+
#include <math.h>
5+
#include "tree.h"
6+
7+
// Memory alloted to a node
8+
node* getnode()
9+
{
10+
node* new_node;
11+
new_node = (node*)malloc(sizeof(node));
12+
return new_node;
13+
}
14+
15+
// Calculates the heigth of node
16+
int height(node* p)
17+
{
18+
if(p == NULL) {
19+
return 0;
20+
} else {
21+
return p->height;
22+
}
23+
}
24+
25+
// Returns the max value from of two integers
26+
int max(int a, int b)
27+
{
28+
return (a > b) ? a : b;
29+
}
30+
31+
// Compute the balance factor of any node
32+
int getBalance(node* p)
33+
{
34+
if(p == NULL) {
35+
return 0;
36+
} else {
37+
return height(p->left) - height(p->right);
38+
}
39+
}
40+
41+
// Rotates the node in left direction
42+
node* rotateleft(node* x)
43+
{
44+
node *y = x->right;
45+
node *T2 = y->left;
46+
47+
y->left = x;
48+
x->right = T2;
49+
50+
x->height = max(height(x->left), height(x->right))+1;
51+
y->height = max(height(y->left), height(y->right))+1;
52+
53+
return y;
54+
}
55+
56+
// Rotates the node in right direction
57+
node* rotateright(node* y)
58+
{
59+
node *x = y->left;
60+
node *T2 = x->right;
61+
62+
x->right = y;
63+
y->left = T2;
64+
65+
y->height = max(height(y->left), height(y->right))+1;
66+
x->height = max(height(x->left), height(x->right))+1;
67+
68+
return x;
69+
}
70+
71+
// Insert a node in the tree and balance the whole tree to maintain the height of tree in order of O(log n).
72+
node* insert(node* root, node* p)
73+
{
74+
if (root == NULL)
75+
return p;
76+
77+
if (p->matchpercentage < root->matchpercentage) {
78+
root->left = insert(root->left, p);
79+
} else if (p->matchpercentage > root->matchpercentage) {
80+
root->right = insert(root->right, p);
81+
} else if(p->matchpercentage == root->matchpercentage) {
82+
if(p->age < root->age) {
83+
root->left = insert(root->left, p);
84+
} else if(p->age > root->age) {
85+
root->right = insert(root->right, p);
86+
} else {
87+
return root;
88+
}
89+
} else {
90+
return root;
91+
}
92+
93+
root->height = 1 + max(height(root->left),height(root->right));
94+
95+
int balance = getBalance(root);
96+
97+
if (balance > 1 && p->matchpercentage < root->left->matchpercentage)
98+
return rotateright(root);
99+
100+
if (balance < -1 && p->matchpercentage > root->right->matchpercentage)
101+
return rotateleft(root);
102+
103+
if (balance > 1 && p->matchpercentage > root->left->matchpercentage)
104+
{
105+
root->left = rotateleft(root->left);
106+
return rotateright(root);
107+
}
108+
109+
if (balance < -1 && p->matchpercentage < root->right->matchpercentage)
110+
{
111+
root->right = rotateright(root->right);
112+
return rotateleft(root);
113+
}
114+
115+
return root;
116+
}
117+
118+
// Preorder representation of the tree
119+
void preorder(node* root)
120+
{
121+
if(root == NULL) {
122+
return;
123+
}
124+
printf("%s\n",root->name);
125+
preorder(root->left);
126+
preorder(root->right);
127+
}
128+
129+
// Print the details of the person whose match percentage lies in the range left_bound & right_bound in increasing order
130+
void printincreasing(node* root, int left_bound, int right_bound)
131+
{
132+
if(root == NULL){
133+
return;
134+
}
135+
printincreasing(root->left,left_bound,right_bound);
136+
if(root->matchpercentage >= left_bound && root->matchpercentage <= right_bound) {
137+
print_node(root);
138+
}
139+
printincreasing(root->right,left_bound,right_bound);
140+
}
141+
142+
// Print the details of the person whose match percentage lies in the range left_bound & right_bound in decreasing order
143+
void printdecreasing(node* root, int left_bound, int right_bound)
144+
{
145+
if(root == NULL) {
146+
return;
147+
}
148+
printdecreasing(root->right,left_bound,right_bound);
149+
if(root->matchpercentage >= left_bound && root->matchpercentage <= right_bound) {
150+
print_node(root);
151+
}
152+
printdecreasing(root->left,left_bound,right_bound);
153+
}
154+
155+
// Print the node in indented format to make user - understandable
156+
void print_node(node* person)
157+
{
158+
int len = strlen(person->name);
159+
printf("%s",person->name);
160+
int i = 0;
161+
for(i = 0;i < 15 - len; i++){
162+
printf(" ");
163+
}
164+
for(i = 0;i < 8; i++){
165+
printf(" ");
166+
}
167+
printf("%d",person->matchpercentage);
168+
if(person->matchpercentage < 10){
169+
printf(" ");
170+
} else {
171+
printf(" ");
172+
}
173+
printf("%d\n",person->age);
174+
}

tree.h

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Node stores the informtaion about Person name, age, match-percentage, name and his GENE.
2+
typedef struct Node {
3+
int height;
4+
int age;
5+
int matchpercentage;
6+
char name[100];
7+
char gene[500];
8+
struct Node* left;
9+
struct Node* right;
10+
}node;
11+
12+
node* getnode();
13+
int height(node* p);
14+
int max(int a, int b);
15+
int getBalance(node* p);
16+
node* rotateleft(node* x);
17+
node* rotateright(node* y);
18+
node* insert(node* root, node* p);
19+
void preorder(node* root);
20+
void printincreasing(node* root, int left_bound, int right_bound);
21+
void printdecreasing(node* root, int left_bound, int right_bound);
22+
void print_node(node* person);

0 commit comments

Comments
 (0)