Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
package com.thealgorithms.compression;

/**
* An implementation of the Run-Length Encoding (RLE) algorithm.
*
* <p>Run-Length Encoding is a simple form of lossless data compression in which
* runs of data (sequences in which the same data value occurs in many
* consecutive data elements) are stored as a single data value and count,
* rather than as the original run.
*
* <p>This implementation provides methods for both compressing and decompressing
* a string. For example:
* <ul>
* <li>Compressing "AAAABBBCCDAA" results in "4A3B2C1D2A".</li>
* <li>Decompressing "4A3B2C1D2A" results in "AAAABBBCCDAA".</li>
* </ul>
*
* <p>Time Complexity: O(n) for both compression and decompression, where n is the
* length of the input string.
*
* <p>References:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Run-length_encoding">Wikipedia: Run-length encoding</a></li>
* </ul>
*/
public final class RunLengthEncoding {

/**
* Private constructor to prevent instantiation of this utility class.
*/
private RunLengthEncoding() {
}

/**
* Compresses a string using the Run-Length Encoding algorithm.
*
* @param text The string to be compressed. Must not be null.
* @return The compressed string. Returns an empty string if the input is empty.
*/
public static String compress(String text) {
if (text == null || text.isEmpty()) {
return "";
}

StringBuilder compressed = new StringBuilder();
int count = 1;

for (int i = 0; i < text.length(); i++) {
// Check if it's the last character or if the next character is different
if (i == text.length() - 1 || text.charAt(i) != text.charAt(i + 1)) {
compressed.append(count);
compressed.append(text.charAt(i));
count = 1; // Reset count for the new character
} else {
count++;
}
}
return compressed.toString();
}

/**
* Decompresses a string that was compressed using the Run-Length Encoding algorithm.
*
* @param compressedText The compressed string. Must not be null.
* @return The original, uncompressed string.
*/
public static String decompress(String compressedText) {
if (compressedText == null || compressedText.isEmpty()) {
return "";
}

StringBuilder decompressed = new StringBuilder();
int count = 0;

for (char ch : compressedText.toCharArray()) {
if (Character.isDigit(ch)) {
// Build the number for runs of 10 or more (e.g., "12A")
count = count * 10 + ch - '0';
} else {
// Append the character 'count' times
decompressed.append(String.valueOf(ch).repeat(Math.max(0, count)));
count = 0; // Reset count for the next sequence
}
}
return decompressed.toString();
}
}
159 changes: 159 additions & 0 deletions src/main/java/com/thealgorithms/compression/ShannonFano.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
package com.thealgorithms.compression;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
* An implementation of the Shannon-Fano algorithm for generating prefix codes.
*
* <p>Shannon-Fano coding is an entropy encoding technique for lossless data
* compression. It assigns variable-length codes to symbols based on their
* frequencies of occurrence. It is a precursor to Huffman coding and works by
* recursively partitioning a sorted list of symbols into two sub-lists with
* nearly equal total frequencies.
*
* <p>The algorithm works as follows:
* <ol>
* <li>Count the frequency of each symbol in the input data.</li>
* <li>Sort the symbols in descending order of their frequencies.</li>
* <li>Recursively divide the list of symbols into two parts with sums of
* frequencies as close as possible to each other.</li>
* <li>Assign a '0' bit to the codes in the first part and a '1' bit to the codes
* in the second part.</li>
* <li>Repeat the process for each part until a part contains only one symbol.</li>
* </ol>
*
* <p>Time Complexity: O(n^2) in this implementation due to the partitioning logic,
* or O(n log n) if a more optimized partitioning strategy is used.
* Sorting takes O(n log n), where n is the number of unique symbols.
*
* <p>References:
* <ul>
* <li><a href="https://en.wikipedia.org/wiki/Shannonâ€"Fano_coding">Wikipedia: Shannonâ€"Fano coding</a></li>
* </ul>
*/
public final class ShannonFano {

/**
* Private constructor to prevent instantiation of this utility class.
*/
private ShannonFano() {
}

/**
* A private inner class to represent a symbol and its frequency.
* Implements Comparable to allow sorting based on frequency.
*/
private static class Symbol implements Comparable<Symbol> {
final char character;
final int frequency;
String code = "";

Symbol(char character, int frequency) {
this.character = character;
this.frequency = frequency;
}

@Override
public int compareTo(Symbol other) {
return Integer.compare(other.frequency, this.frequency); // Sort descending
}
}

/**
* Generates Shannon-Fano codes for the symbols in a given text.
*
* @param text The input string for which to generate codes. Must not be null.
* @return A map where keys are characters and values are their corresponding Shannon-Fano codes.
*/
public static Map<Character, String> generateCodes(String text) {
if (text == null || text.isEmpty()) {
return Collections.emptyMap();
}

Map<Character, Integer> frequencyMap = new HashMap<>();
for (char c : text.toCharArray()) {
frequencyMap.put(c, frequencyMap.getOrDefault(c, 0) + 1);
}

List<Symbol> symbols = new ArrayList<>();
for (Map.Entry<Character, Integer> entry : frequencyMap.entrySet()) {
symbols.add(new Symbol(entry.getKey(), entry.getValue()));
}

Collections.sort(symbols);

// Special case: only one unique symbol
if (symbols.size() == 1) {
symbols.getFirst().code = "0";
} else {
buildCodeTree(symbols, 0, symbols.size() - 1, "");
}

return symbols.stream().collect(Collectors.toMap(s -> s.character, s -> s.code));
}

/**
* Recursively builds the Shannon-Fano code tree by partitioning the list of symbols.
* Uses index-based approach to avoid sublist creation issues.
*
* @param symbols The sorted list of symbols to be processed.
* @param start The start index of the current partition.
* @param end The end index of the current partition (inclusive).
* @param prefix The current prefix code being built for the symbols in this partition.
*/
private static void buildCodeTree(List<Symbol> symbols, int start, int end, String prefix) {
// The initial check in generateCodes ensures start <= end is always true here.
// The base case is when a partition has only one symbol.
if (start == end) {
symbols.get(start).code = prefix;
return;
}

// Find the optimal split point
int splitIndex = findSplitIndex(symbols, start, end);

// Recursively process left and right partitions with updated prefixes
buildCodeTree(symbols, start, splitIndex, prefix + "0");
buildCodeTree(symbols, splitIndex + 1, end, prefix + "1");
}

/**
* Finds the index that splits the range into two parts with the most balanced frequency sums.
* This method tries every possible split point and returns the index that minimizes the
* absolute difference between the two partition sums.
*
* @param symbols The sorted list of symbols.
* @param start The start index of the range.
* @param end The end index of the range (inclusive).
* @return The index of the last element in the first partition.
*/
private static int findSplitIndex(List<Symbol> symbols, int start, int end) {
// Calculate total frequency for the entire range
long totalFrequency = 0;
for (int i = start; i <= end; i++) {
totalFrequency += symbols.get(i).frequency;
}

long leftSum = 0;
long minDifference = Long.MAX_VALUE;
int splitIndex = start;

// Try every possible split point and find the one with minimum difference
for (int i = start; i < end; i++) {
leftSum += symbols.get(i).frequency;
long rightSum = totalFrequency - leftSum;
long difference = Math.abs(leftSum - rightSum);

if (difference < minDifference) {
minDifference = difference;
splitIndex = i;
}
}
return splitIndex;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package com.thealgorithms.compression;

import static org.junit.jupiter.api.Assertions.assertEquals;

import org.junit.jupiter.api.Test;

class RunLengthEncodingTest {

@Test
void testNullInputs() {
// Test that a null input to compress returns an empty string
assertEquals("", RunLengthEncoding.compress(null));

// Test that a null input to decompress returns an empty string
assertEquals("", RunLengthEncoding.decompress(null));
}

@Test
void testCompressionSimple() {
// Test a typical string with multiple runs
String input = "AAAABBBCCDAA";
String expected = "4A3B2C1D2A";
assertEquals(expected, RunLengthEncoding.compress(input));
}

@Test
void testCompressionWithNoRuns() {
// Test a string with no consecutive characters
String input = "ABCDE";
String expected = "1A1B1C1D1E";
assertEquals(expected, RunLengthEncoding.compress(input));
}

@Test
void testCompressionEdgeCases() {
// Test with an empty string
assertEquals("", RunLengthEncoding.compress(""));

// Test with a single character
assertEquals("1A", RunLengthEncoding.compress("A"));

// Test with a long run of a single character
assertEquals("10Z", RunLengthEncoding.compress("ZZZZZZZZZZ"));
}

@Test
void testDecompressionSimple() {
// Test decompression of a typical RLE string
String input = "4A3B2C1D2A";
String expected = "AAAABBBCCDAA";
assertEquals(expected, RunLengthEncoding.decompress(input));
}

@Test
void testDecompressionWithNoRuns() {
// Test decompression of a string with single characters
String input = "1A1B1C1D1E";
String expected = "ABCDE";
assertEquals(expected, RunLengthEncoding.decompress(input));
}

@Test
void testDecompressionWithMultiDigitCount() {
// Test decompression where a run count is greater than 9
String input = "12A1B3C";
String expected = "AAAAAAAAAAAABCCC";
assertEquals(expected, RunLengthEncoding.decompress(input));
}

@Test
void testDecompressionEdgeCases() {
// Test with an empty string
assertEquals("", RunLengthEncoding.decompress(""));

// Test with a single character run
assertEquals("A", RunLengthEncoding.decompress("1A"));
}

@Test
void testSymmetry() {
// Test that compressing and then decompressing returns the original string
String original1 = "WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB";
String compressed = RunLengthEncoding.compress(original1);
String decompressed = RunLengthEncoding.decompress(compressed);
assertEquals(original1, decompressed);

String original2 = "A";
assertEquals(original2, RunLengthEncoding.decompress(RunLengthEncoding.compress(original2)));
}
}
Loading