Tuesday, May 12, 2015

Write a program to compare two files A and B and report the following: lines present in both files A and B = A U B, lines common to both A and B = A & B, lines present in A but not B = A - B, lines present in B but not in A = B - A.

#!/usr/local/bin/perl

##Script to compare 2 NEWS files
## Generates 3 files
## FILE_1_only: This file contains lines present only in first file
## FILE_2_only: This file contains lines present only in second file
## FILE1_2: This file contains lines common to both files

use strict;
use warnings;

my %hash1;
my %hash2;

##Use Tie module to process the hash in the insertion order ###
use Tie::IxHash;
tie %hash1, "Tie::IxHash";
tie %hash2, "Tie::IxHash";

###trim any leading or trailing spaces####
#sub rtrim { my $s = shift; $s =~ s/\s+$//; return $s };
sub trim { my $s = shift; $s =~ s/^\s+//; $s =~ s/\s+$//; return $s };

if(@ARGV != 2) {
        print "Please pass the files to be compared as command line arguments \n";
        print "Usage: $0 file1 file2 \n";
        print "Ex: $0 NEWS_5_0_1 NEWS_3_15_0 \n";
        print "O/P: FILE_1_only, FILE_2_only and FILE1_2 \n";
}

open (FH1, "<$ARGV[0]") or die "Couldnot open file $ARGV[0], $!";
open (FH2, "<$ARGV[1]") or die "Couldnot open file $ARGV[1], $!";
open (FH3, ">FILE1_2");
open (FH4, ">FILE_1_only");
open (FH5, ">FILE_2_only");

my @arr1=<FH1>;
my @arr2=<FH2>;

foreach my $l (@arr1) {
      chomp($l);
      $hash1{trim($l)}++;
}


foreach my $m (@arr2) {
      chomp($m);
      $hash2{trim($m)}++;
}

foreach my $k1 (keys %hash1) {
        if($hash2{$k1}) {
                print FH3 $k1;
                print FH3 "\n";
        } else {
                print FH4 $k1;
                print FH4 "\n";
        }
}

foreach my $k2 (keys %hash2) {
        if (!$hash1{$k2}) {
                print FH5 $k2;
                print FH5 "\n";
        }
}




Usage: ./compare_NEWS.pl NEWS_5.0.1 NEWS_3.15.0

O/P:
FILE_1_only: This file contains lines present only in first file
FILE_2_only: This file contains lines present only in second file
FILE1_2: This file contains lines common to both files

No comments:

Post a Comment