Add libfec as external project

The CMake build system will first try to locate libfec in the system. if
it is not available, it will start building the libfec that is part of
the gr-satnogs source code. During the installation libfec is installed
in the system, thus future builds will make use of it and will not
rebuild from source.
This commit is contained in:
Manolis Surligas 2018-01-31 19:18:45 +02:00
parent 02801c9a3f
commit f33f46edb9
126 changed files with 16966 additions and 2 deletions

View File

@ -131,7 +131,6 @@ find_package(Volk REQUIRED)
find_package(OggVorbis REQUIRED)
find_package(PNG REQUIRED)
find_package(png++ REQUIRED)
find_package(Fec REQUIRED)
########################################################################
# Include or not into the module blocks for debugging
@ -150,6 +149,45 @@ if(${INCLUDE_DEBUG_BLOCKS})
endif()
endif()
########################################################################
# Search for the libfec if it is already installed in the system
# If not, install the internal one.
########################################################################
find_package(Fec)
if(NOT FEC_FOUND)
message(WARNING "libfec is not installed. The internal libfec will be automatically build and install.")
include(ExternalProject)
ExternalProject_Add(FEC_EXTERNAL
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/libfec
BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/libfec
CMAKE_ARGS "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
"-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
"-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
INSTALL_COMMAND ""
)
ExternalProject_Get_Property(FEC_EXTERNAL binary_dir)
add_library(fec SHARED IMPORTED)
set_property(TARGET fec PROPERTY IMPORTED_LOCATION ${install_dir}/libfec.so)
add_dependencies(fec FEC_EXTERNAL)
set(FEC_LIBRARIES "${binary_dir}/libfec.so")
set(FEC_INCLUDE_DIRS "${CMAKE_CURRENT_SOURCE_DIR}/libfec")
# Install the header and the library in the standard places
install(FILES
"${FEC_INCLUDE_DIRS}/fec.h"
DESTINATION "include"
)
install(FILES
${FEC_LIBRARIES}
DESTINATION lib${LIB_SUFFIX}
)
else()
add_library(fec INTERFACE)
endif()
# Search for GNU Radio and its components and versions. Add any
# components required to the list of GR_REQUIRED_COMPONENTS (in all
# caps such as FILTER or FFT) and change the version to the minimum

View File

@ -0,0 +1,310 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
##################################################
# GNU Radio Python Flow Graph
# Title: Debug Afsk Transceiver Osmocom
# Generated: Mon Jun 13 20:30:12 2016
##################################################
if __name__ == '__main__':
import ctypes
import sys
if sys.platform.startswith('linux'):
try:
x11 = ctypes.cdll.LoadLibrary('libX11.so')
x11.XInitThreads()
except:
print "Warning: failed to XInitThreads()"
from PyQt4 import Qt
from gnuradio import analog
from gnuradio import audio
from gnuradio import blocks
from gnuradio import eng_notation
from gnuradio import filter
from gnuradio import gr
from gnuradio import qtgui
from gnuradio.eng_option import eng_option
from gnuradio.filter import firdes
from gnuradio.qtgui import Range, RangeWidget
from optparse import OptionParser
import math
import numpy
import satnogs
import sip
import sys
class debug_afsk_transceiver_osmocom(gr.top_block, Qt.QWidget):
def __init__(self):
gr.top_block.__init__(self, "Debug Afsk Transceiver Osmocom")
Qt.QWidget.__init__(self)
self.setWindowTitle("Debug Afsk Transceiver Osmocom")
try:
self.setWindowIcon(Qt.QIcon.fromTheme('gnuradio-grc'))
except:
pass
self.top_scroll_layout = Qt.QVBoxLayout()
self.setLayout(self.top_scroll_layout)
self.top_scroll = Qt.QScrollArea()
self.top_scroll.setFrameStyle(Qt.QFrame.NoFrame)
self.top_scroll_layout.addWidget(self.top_scroll)
self.top_scroll.setWidgetResizable(True)
self.top_widget = Qt.QWidget()
self.top_scroll.setWidget(self.top_widget)
self.top_layout = Qt.QVBoxLayout(self.top_widget)
self.top_grid_layout = Qt.QGridLayout()
self.top_layout.addLayout(self.top_grid_layout)
self.settings = Qt.QSettings("GNU Radio", "debug_afsk_transceiver_osmocom")
self.restoreGeometry(self.settings.value("geometry").toByteArray())
##################################################
# Variables
##################################################
self.samples_per_symbol_tx = samples_per_symbol_tx = 4
self.sq_wave = sq_wave = (1.0, ) * samples_per_symbol_tx
self.gaussian_taps = gaussian_taps = filter.firdes.gaussian(1.0, samples_per_symbol_tx, 1.0, 4*samples_per_symbol_tx)
self.deviation = deviation = 800
self.baud_rate = baud_rate = 1200
self.tx_frequency = tx_frequency = 145.835e6
self.samp_rate_tx = samp_rate_tx = 48e3
self.modulation_index = modulation_index = deviation / (baud_rate / 2.0)
self.interp_taps = interp_taps = numpy.convolve(numpy.array(gaussian_taps), numpy.array(sq_wave))
self.atten = atten = 0.1
##################################################
# Blocks
##################################################
self._atten_range = Range(0, 0.9, 0.01, 0.1, 200)
self._atten_win = RangeWidget(self._atten_range, self.set_atten, "Attenuation", "counter_slider", float)
self.top_layout.addWidget(self._atten_win)
self.satnogs_upsat_fsk_frame_encoder_0 = satnogs.upsat_fsk_frame_encoder([0x33]*8, [0x7A, 0x0E], False, False, False, True, True, "ABCD", 0, "UPSAT", 0, 1024)
self.satnogs_udp_msg_source_0 = satnogs.udp_msg_source("127.0.0.1", 16886, 1500)
self.satnogs_debug_msg_source_0 = satnogs.debug_msg_source("HELLO"*4, 1, True)
self.rational_resampler_xxx_0 = filter.rational_resampler_ccc(
interpolation=10,
decimation=1,
taps=None,
fractional_bw=None,
)
self.qtgui_time_sink_x_0_0_0 = qtgui.time_sink_c(
1024, #size
samp_rate_tx, #samp_rate
"", #name
1 #number of inputs
)
self.qtgui_time_sink_x_0_0_0.set_update_time(0.10)
self.qtgui_time_sink_x_0_0_0.set_y_axis(-1, 1)
self.qtgui_time_sink_x_0_0_0.set_y_label("Amplitude", "")
self.qtgui_time_sink_x_0_0_0.enable_tags(-1, True)
self.qtgui_time_sink_x_0_0_0.set_trigger_mode(qtgui.TRIG_MODE_FREE, qtgui.TRIG_SLOPE_POS, 0.0, 0, 0, "")
self.qtgui_time_sink_x_0_0_0.enable_autoscale(False)
self.qtgui_time_sink_x_0_0_0.enable_grid(False)
self.qtgui_time_sink_x_0_0_0.enable_control_panel(True)
if not True:
self.qtgui_time_sink_x_0_0_0.disable_legend()
labels = ["", "", "", "", "",
"", "", "", "", ""]
widths = [1, 1, 1, 1, 1,
1, 1, 1, 1, 1]
colors = ["blue", "red", "green", "black", "cyan",
"magenta", "yellow", "dark red", "dark green", "blue"]
styles = [1, 1, 1, 1, 1,
1, 1, 1, 1, 1]
markers = [2, -1, -1, -1, -1,
-1, -1, -1, -1, -1]
alphas = [1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0]
for i in xrange(2*1):
if len(labels[i]) == 0:
if(i % 2 == 0):
self.qtgui_time_sink_x_0_0_0.set_line_label(i, "Re{{Data {0}}}".format(i/2))
else:
self.qtgui_time_sink_x_0_0_0.set_line_label(i, "Im{{Data {0}}}".format(i/2))
else:
self.qtgui_time_sink_x_0_0_0.set_line_label(i, labels[i])
self.qtgui_time_sink_x_0_0_0.set_line_width(i, widths[i])
self.qtgui_time_sink_x_0_0_0.set_line_color(i, colors[i])
self.qtgui_time_sink_x_0_0_0.set_line_style(i, styles[i])
self.qtgui_time_sink_x_0_0_0.set_line_marker(i, markers[i])
self.qtgui_time_sink_x_0_0_0.set_line_alpha(i, alphas[i])
self._qtgui_time_sink_x_0_0_0_win = sip.wrapinstance(self.qtgui_time_sink_x_0_0_0.pyqwidget(), Qt.QWidget)
self.top_layout.addWidget(self._qtgui_time_sink_x_0_0_0_win)
self.qtgui_time_sink_x_0_0 = qtgui.time_sink_f(
1024, #size
samp_rate_tx, #samp_rate
"", #name
1 #number of inputs
)
self.qtgui_time_sink_x_0_0.set_update_time(0.10)
self.qtgui_time_sink_x_0_0.set_y_axis(-1, 1)
self.qtgui_time_sink_x_0_0.set_y_label("Amplitude", "")
self.qtgui_time_sink_x_0_0.enable_tags(-1, True)
self.qtgui_time_sink_x_0_0.set_trigger_mode(qtgui.TRIG_MODE_FREE, qtgui.TRIG_SLOPE_POS, 0.0, 0, 0, "")
self.qtgui_time_sink_x_0_0.enable_autoscale(False)
self.qtgui_time_sink_x_0_0.enable_grid(False)
self.qtgui_time_sink_x_0_0.enable_control_panel(True)
if not True:
self.qtgui_time_sink_x_0_0.disable_legend()
labels = ["", "", "", "", "",
"", "", "", "", ""]
widths = [1, 1, 1, 1, 1,
1, 1, 1, 1, 1]
colors = ["blue", "red", "green", "black", "cyan",
"magenta", "yellow", "dark red", "dark green", "blue"]
styles = [1, 1, 1, 1, 1,
1, 1, 1, 1, 1]
markers = [2, -1, -1, -1, -1,
-1, -1, -1, -1, -1]
alphas = [1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0, 1.0]
for i in xrange(1):
if len(labels[i]) == 0:
self.qtgui_time_sink_x_0_0.set_line_label(i, "Data {0}".format(i))
else:
self.qtgui_time_sink_x_0_0.set_line_label(i, labels[i])
self.qtgui_time_sink_x_0_0.set_line_width(i, widths[i])
self.qtgui_time_sink_x_0_0.set_line_color(i, colors[i])
self.qtgui_time_sink_x_0_0.set_line_style(i, styles[i])
self.qtgui_time_sink_x_0_0.set_line_marker(i, markers[i])
self.qtgui_time_sink_x_0_0.set_line_alpha(i, alphas[i])
self._qtgui_time_sink_x_0_0_win = sip.wrapinstance(self.qtgui_time_sink_x_0_0.pyqwidget(), Qt.QWidget)
self.top_layout.addWidget(self._qtgui_time_sink_x_0_0_win)
self.interp_fir_filter_xxx_0 = filter.interp_fir_filter_fff(samples_per_symbol_tx, (interp_taps))
self.interp_fir_filter_xxx_0.declare_sample_delay(0)
self.blocks_vco_f_0 = blocks.vco_f(48e3, -48e3, 1.0)
self.blocks_multiply_const_vxx_0 = blocks.multiply_const_vff((atten, ))
self.audio_sink_0_0 = audio.sink(48000, "", True)
self.analog_quadrature_demod_cf_0 = analog.quadrature_demod_cf(48e3/(2*math.pi*deviation/8.0))
self.analog_frequency_modulator_fc_0 = analog.frequency_modulator_fc((math.pi*modulation_index) / samples_per_symbol_tx)
##################################################
# Connections
##################################################
self.msg_connect((self.satnogs_debug_msg_source_0, 'msg'), (self.satnogs_upsat_fsk_frame_encoder_0, 'pdu'))
self.msg_connect((self.satnogs_udp_msg_source_0, 'msg'), (self.satnogs_upsat_fsk_frame_encoder_0, 'pdu'))
self.connect((self.analog_frequency_modulator_fc_0, 0), (self.qtgui_time_sink_x_0_0_0, 0))
self.connect((self.analog_frequency_modulator_fc_0, 0), (self.rational_resampler_xxx_0, 0))
self.connect((self.analog_quadrature_demod_cf_0, 0), (self.blocks_multiply_const_vxx_0, 0))
self.connect((self.blocks_multiply_const_vxx_0, 0), (self.audio_sink_0_0, 0))
self.connect((self.blocks_vco_f_0, 0), (self.qtgui_time_sink_x_0_0, 0))
self.connect((self.interp_fir_filter_xxx_0, 0), (self.analog_frequency_modulator_fc_0, 0))
self.connect((self.interp_fir_filter_xxx_0, 0), (self.blocks_vco_f_0, 0))
self.connect((self.rational_resampler_xxx_0, 0), (self.analog_quadrature_demod_cf_0, 0))
self.connect((self.satnogs_upsat_fsk_frame_encoder_0, 0), (self.interp_fir_filter_xxx_0, 0))
def closeEvent(self, event):
self.settings = Qt.QSettings("GNU Radio", "debug_afsk_transceiver_osmocom")
self.settings.setValue("geometry", self.saveGeometry())
event.accept()
def get_samples_per_symbol_tx(self):
return self.samples_per_symbol_tx
def set_samples_per_symbol_tx(self, samples_per_symbol_tx):
self.samples_per_symbol_tx = samples_per_symbol_tx
self.set_gaussian_taps(filter.firdes.gaussian(1.0, self.samples_per_symbol_tx, 1.0, 4*self.samples_per_symbol_tx))
self.set_sq_wave((1.0, ) * self.samples_per_symbol_tx)
self.analog_frequency_modulator_fc_0.set_sensitivity((math.pi*self.modulation_index) / self.samples_per_symbol_tx)
def get_sq_wave(self):
return self.sq_wave
def set_sq_wave(self, sq_wave):
self.sq_wave = sq_wave
self.set_interp_taps(numpy.convolve(numpy.array(self.gaussian_taps), numpy.array(self.sq_wave)))
def get_gaussian_taps(self):
return self.gaussian_taps
def set_gaussian_taps(self, gaussian_taps):
self.gaussian_taps = gaussian_taps
self.set_interp_taps(numpy.convolve(numpy.array(self.gaussian_taps), numpy.array(self.sq_wave)))
def get_deviation(self):
return self.deviation
def set_deviation(self, deviation):
self.deviation = deviation
self.set_modulation_index(self.deviation / (self.baud_rate / 2.0))
self.analog_quadrature_demod_cf_0.set_gain(48e3/(2*math.pi*self.deviation/8.0))
def get_baud_rate(self):
return self.baud_rate
def set_baud_rate(self, baud_rate):
self.baud_rate = baud_rate
self.set_modulation_index(self.deviation / (self.baud_rate / 2.0))
def get_tx_frequency(self):
return self.tx_frequency
def set_tx_frequency(self, tx_frequency):
self.tx_frequency = tx_frequency
def get_samp_rate_tx(self):
return self.samp_rate_tx
def set_samp_rate_tx(self, samp_rate_tx):
self.samp_rate_tx = samp_rate_tx
self.qtgui_time_sink_x_0_0_0.set_samp_rate(self.samp_rate_tx)
self.qtgui_time_sink_x_0_0.set_samp_rate(self.samp_rate_tx)
def get_modulation_index(self):
return self.modulation_index
def set_modulation_index(self, modulation_index):
self.modulation_index = modulation_index
self.analog_frequency_modulator_fc_0.set_sensitivity((math.pi*self.modulation_index) / self.samples_per_symbol_tx)
def get_interp_taps(self):
return self.interp_taps
def set_interp_taps(self, interp_taps):
self.interp_taps = interp_taps
self.interp_fir_filter_xxx_0.set_taps((self.interp_taps))
def get_atten(self):
return self.atten
def set_atten(self, atten):
self.atten = atten
self.blocks_multiply_const_vxx_0.set_k((self.atten, ))
def main(top_block_cls=debug_afsk_transceiver_osmocom, options=None):
from distutils.version import StrictVersion
if StrictVersion(Qt.qVersion()) >= StrictVersion("4.5.0"):
style = gr.prefs().get_string('qtgui', 'style', 'raster')
Qt.QApplication.setGraphicsSystem(style)
qapp = Qt.QApplication(sys.argv)
tb = top_block_cls()
tb.start()
tb.show()
def quitting():
tb.stop()
tb.wait()
qapp.connect(qapp, Qt.SIGNAL("aboutToQuit()"), quitting)
qapp.exec_()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,5 @@
# this module will be imported in the into your flowgraph
def append_dev_args(device, dev_args):
if(len(dev_args) == 0):
return 0

Binary file not shown.

View File

@ -22,4 +22,4 @@ FIND_LIBRARY(
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(FEC DEFAULT_MSG FEC_LIBRARIES FEC_INCLUDE_DIRS)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(FEC DEFAULT_MSG FEC_LIBRARIES FEC_INCLUDE_DIRS)

View File

@ -81,6 +81,9 @@ if(NOT satnogs_sources)
endif(NOT satnogs_sources)
add_library(gnuradio-satnogs SHARED ${satnogs_sources})
add_dependencies(gnuradio-satnogs fec)
target_link_libraries(gnuradio-satnogs
${Boost_LIBRARIES}
${GNURADIO_ALL_LIBRARIES}

323
libfec/CMakeLists.txt Normal file
View File

@ -0,0 +1,323 @@
########################################################################
# Project setup
########################################################################
cmake_minimum_required(VERSION 2.8)
project(libfec ASM C)
option(BUILD_32BIT_ON_64BIT "Build a 32-bit library on a 64-bit system" OFF)
# Select the release build type by default to get optimization flags
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release")
message(STATUS "Build type not specified: defaulting to release.")
endif(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "")
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
if(NOT LIB_INSTALL_DIR)
set(LIB_INSTALL_DIR lib)
endif()
########################################################################
# Version information
########################################################################
set(VERSION_INFO_MAJOR 3)
set(VERSION_INFO_MINOR 0)
set(VERSION_INFO_PATCH 0)
if(NOT DEFINED VERSION_INFO_EXTRA)
set(VERSION_INFO_EXTRA "git")
endif()
include(Version)
if(NOT DEFINED VERSION)
#set(VERSION "\"${VERSION_INFO_MAJOR}.${VERSION_INFO_MINOR}.${VERSION_INFO_PATCH}\"")
set(VERSION "\"${VERSION_INFO}\"")
endif()
########################################################################
# Compiler specific setup
########################################################################
if(BUILD_32BIT_ON_64BIT)
set(CMAKE_SYSTEM_PROCESSOR "i386")
set(CMAKE_SIZEOF_VOID_P 4)
set(CMAKE_C_FLAGS -m32)
set(CMAKE_CXX_FLAGS -m32)
add_definitions(-m32)
endif()
if((CMAKE_SYSTEM_PROCESSOR MATCHES "i386|i686|x86|AMD64") AND (CMAKE_SIZEOF_VOID_P EQUAL 4))
set(TARGET_ARCH "x86")
elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|AMD64") AND (CMAKE_SIZEOF_VOID_P EQUAL 8))
set(TARGET_ARCH "x64")
elseif((CMAKE_SYSTEM_PROCESSOR MATCHES "i386") AND (CMAKE_SIZEOF_VOID_P EQUAL 8) AND (APPLE))
# Mac is weird like that.
set(TARGET_ARCH "x64")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm*")
set(TARGET_ARCH "ARM")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
set(TARGET_ARCH "ppc64" "ppc64le")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
set(TARGET_ARCH "ppc64" "ppc")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)")
set(TARGET_ARCH "ppc")
endif()
if(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANGCC)
add_definitions(-Wall)
add_definitions(-Wno-unused)
if(TARGET_ARCH MATCHES "x64")
add_definitions(-fPIC)
add_definitions(-msse2)
elseif(TARGET_ARCH MATCHES "x86")
add_definitions(-mmmx)
add_definitions(-msse)
add_definitions(-msse2)
elseif(TARGET_ARCH MATCHES "ppc|ppc64")
add_definitions(-fno-common)
add_definitions(-faltivec)
endif()
endif()
########################################################################
# Find build dependencies
########################################################################
# libm
find_library(M_LIB m REQUIRED)
########################################################################
# config.h
########################################################################
#add_definitions(-DHAVE_CONFIG_H)
# Checks for includes
include(CheckIncludeFile)
check_include_file("getopt.h" HAVE_GETOPT_H)
check_include_file("stdio.h" HAVE_STDIO_H)
check_include_file("stdlib.h" HAVE_STDLIB_H)
check_include_file("memory.h" HAVE_MEMORY_H)
check_include_file("string.h" HAVE_STRING_H)
# Checks for functions
include(CheckFunctionExists)
check_function_exists("getopt_long" HAVE_GETOPT_LONG)
check_function_exists("memset" HAVE_MEMSET)
check_function_exists("memmove" HAVE_MEMMOVE)
########################################################################
# Setup apps
########################################################################
if(TARGET_ARCH MATCHES "x64")
list(APPEND libfec_sources
dotprod_port.c
peakval_port.c
sumsq.c
sumsq_port.c
cpu_mode_x86_64.c
##asm
#sse2bfly27-64.s
#sse2bfly29-64.s
)
elseif(TARGET_ARCH MATCHES "x86")
list(APPEND libfec_sources
viterbi27_mmx.c
viterbi27_sse.c
viterbi27_sse2.c
viterbi29_mmx.c
viterbi29_sse.c
viterbi29_sse2.c
viterbi39_sse2.c
viterbi39_sse.c
viterbi39_mmx.c
viterbi615_mmx.c
viterbi615_sse.c
viterbi615_sse2.c
dotprod_mmx.c
dotprod_sse2.c
#peakval_mmx.c
#peakval_sse.c
#peakval_sse2.c
sumsq.c
sumsq_port.c
sumsq_sse2.c
sumsq_mmx.c
cpu_mode_x86.c
#asm
cpu_features.s
dotprod_mmx_assist.s
dotprod_sse2_assist.s
mmxbfly27.s
mmxbfly29.s
peak_mmx_assist.s
peak_sse2_assist.s
peak_sse_assist.s
peakval_mmx_assist.s
peakval_sse2_assist.s
peakval_sse_assist.s
sse2bfly27.s
sse2bfly29.s
ssebfly27.s
ssebfly29.s
sumsq_mmx_assist.s
sumsq_sse2_assist.s
)
elseif(TARGET_ARCH MATCHES "ppc|ppc64")
list(APPEND libfec_sources
viterbi27_av.c
viterbi29_av.c
viterbi39_av.c
viterbi615_av.c
encode_rs_av.c
dotprod_av.c
sumsq_av.c
peakval_av.c
cpu_mode_ppc.c
)
else()
list(APPEND libfec_sources
cpu_mode_generic.c
)
endif()
list(APPEND libfec_sources
fec.c
sim.c
viterbi27.c
viterbi27_port.c
viterbi29.c
viterbi29_port.c
viterbi39.c
viterbi39_port.c
viterbi615.c
viterbi615_port.c
encode_rs_char.c
encode_rs_int.c
encode_rs_8.c
decode_rs_char.c
decode_rs_int.c
decode_rs_8.c
init_rs_char.c
init_rs_int.c
encode_rs_ccsds.c
decode_rs_ccsds.c
dotprod.c
dotprod_port.c
peakval.c
peakval_port.c
sumsq.c
sumsq_port.c
ccsds_tab.c
ccsds_tal.c
)
################################################################################
# Generate pkg-config file
################################################################################
foreach(inc ${LIBFEC_INCLUDE_DIR})
list(APPEND LIBFEC_PC_CFLAGS "-I${inc}")
endforeach()
foreach(lib ${LIBFEC_LIBRARY_DIRS})
list(APPEND LIBFEC_PC_PRIV_LIBS "-L${lib}")
endforeach()
set(LIBFEC_PC_PREFIX ${CMAKE_INSTALL_PREFIX})
set(LIBFEC_PC_EXEC_PREFIX \${prefix})
set(LIBFEC_PC_LIBDIR \${exec_prefix}/${LIB_INSTALL_DIR})
set(LIBFEC_PC_INCLUDEDIR \${prefix}/include)
set(LIBFEC_PC_VERSION ${VERSION})
set(LIBFEC_PC_LIBS "-lfec")
# Use space-delimiter in the .pc file, rather than CMake's semicolon separator
string(REPLACE ";" " " LIBFEC_PC_CFLAGS "${LIBFEC_PC_CFLAGS}")
string(REPLACE ";" " " LIBFEC_PC_LIBS "${LIBFEC_PC_LIBS}")
# Unset these to avoid hard-coded paths in a cross-environment
if(CMAKE_CROSSCOMPILING)
unset(LIBFEC_PC_CFLAGS)
unset(LIBFEC_PC_LIBS)
endif()
configure_file(
${CMAKE_CURRENT_SOURCE_DIR}/libfec.pc.in
${CMAKE_CURRENT_BINARY_DIR}/libfec.pc
@ONLY
)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/libfec.pc
DESTINATION ${LIB_INSTALL_DIR}/pkgconfig/
)
########################################################################
# Setup libraries
########################################################################
# generate ccsds_tab.c
add_executable(gen_ccsds gen_ccsds.c init_rs_char.c)
add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/ccsds_tab.c
COMMAND ${CMAKE_BINARY_DIR}/gen_ccsds > ccsds_tab.c
DEPENDS gen_ccsds
)
# generate ccsds_tal.c
add_executable(gen_ccsds_tal gen_ccsds_tal.c)
add_custom_command(
OUTPUT ${CMAKE_BINARY_DIR}/ccsds_tal.c
COMMAND ${CMAKE_BINARY_DIR}/gen_ccsds_tal > ccsds_tal.c
DEPENDS gen_ccsds_tal
)
# libfec
add_library(libfec_shared SHARED ${libfec_sources})
set_target_properties(libfec_shared PROPERTIES OUTPUT_NAME fec)
target_link_libraries(libfec_shared ${M_LIB})
install(TARGETS libfec_shared
DESTINATION ${LIB_INSTALL_DIR})
install(FILES "${PROJECT_SOURCE_DIR}/fec.h"
DESTINATION include)
########################################################################
# Create uninstall target
########################################################################
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in"
"${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake"
IMMEDIATE @ONLY)
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
########################################################################
# Print Summary
########################################################################
message(STATUS "")
message(STATUS "##########################################################")
message(STATUS "## Building for version: ${VERSION}")
message(STATUS "## Target Architecture: ${TARGET_ARCH}")
message(STATUS "## Using install prefix: ${CMAKE_INSTALL_PREFIX}")
message(STATUS "##########################################################")
message(STATUS "")

51
libfec/INSTALL Normal file
View File

@ -0,0 +1,51 @@
INSTALLATION INSTRUCTIONS
CMake-based build:
Works on most platforms. Do
mkdir build
cd build
cmake ..
make
If that fails, try the older automake-based build:
./bootstrap
./configure
make
make test (optional)
make install (as root)
By default, "make install" puts the libfec libraries in
/usr/local/lib, the include files in /usr/local/include, and the
manual page in /usr/local/man.
You may have an old version of the GNU assembler that cannot handle
the relatively new SSE2 mnemonics. Update your version of the GNU
"binutils" package.
You may obtain the latest binutils package through your normal
distribution channels or from:
http://sources.redhat.com/binutils/
TESTING THE FEC LIBRARY
After running the ./configure script, optional tests can be built and
run as follows:
make test
"make test" tests each routine, using the SIMD versions as
appropriate, verifying correct operation and estimating Viterbi
decoding speeds. These tests should always succeed unless something is
broken.
28 Mar 2004
Phil Karn, karn@ka9q.net
3 Jan 2014
Matthias P. Braendli, matthias@mpb.li

502
libfec/LICENSE Normal file
View File

@ -0,0 +1,502 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
(This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.)
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
{description}
Copyright (C) {year} {fullname}
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
{signature of Ty Coon}, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!

125
libfec/README Normal file
View File

@ -0,0 +1,125 @@
COPYRIGHT
This package is copyright 2006 by Phil Karn, KA9Q. It may be used
under the terms of the GNU Lesser General Public License (LGPL). See
the file "lesser.txt" in this package for license details.
It has been modified by Matthias P. Braendli, HB9EGM, so that it
compiles for x86_64 and for arm.
For installation instructions, please see INSTALL
INTRODUCTION
This package provides a set of functions that implement several
popular forward error correction (FEC) algorithms and several low-level routines
useful in modems implemented with digital signal processing (DSP).
The following routines are provided:
1. Viterbi decoders for the following convolutional codes:
r=1/2 k=7 ("Voyager" code, now a widely used industry standard)
r=1/2 k=9 (Used on the IS-95 CDMA forward link)
r=1/6 k=15 ("Cassini" code, used by several NASA/JPL deep space missions)
2. Reed-Solomon encoders and decoders for any user-specified code.
3. Optimized encoder and decoder for the CCSDS-standard (255,223)
Reed-Solomon code, with and without the CCSDS-standard "dual basis"
symbol representation.
4. Compute dot product between a 16-bit buffer and a set of 16-bit
coefficients. This is the basic DSP primitive for digital filtering
and correlation.
4. Compute sum of squares of a buffer of 16-bit signed integers. This is
useful in DSP for finding the total energy in a signal.
5. Find peak value in a buffer of 16-bit signed integers, useful for
scaling a signal to prevent overflow.
SIMD SUPPORT
This package automatically makes use of various SIMD (Single
Instruction stream, Multiple Data stream) instruction sets, when
available: MMX, SSE and SSE2 on the IA-32 (Intel) architecture, and
Altivec on the PowerPC G4 and G5 used by Power Macintoshes.
"Altivec" is a Motorola trademark; Apple calls it "Velocity Engine",
and IBM calls it "VMX". Altivec is roughly comparable to SSE2 on the
IA-32.
Many of the SIMD versions run more than an order of
magnitude faster than their portable C versions. The available SIMD
instruction sets, if any, are determined at run time and the proper
version of each routine is automatically selected. If no SIMD
instructions are available, the portable C version is invoked by
default. On targets other than IA-32 and PPC, only the portable C
version is built.
The SIMD-assisted versions generally produce the same results as the C
versions, with a few minor exceptions. The Viterbi decoders in C have
a very slightly greater Eb/No performance due to their use of 32-bit
path metrics. On the other hand, the SIMD versions use the
"saturating" arithmetic available in these instructions to avoid the
integer wraparounds that can occur in C when argument ranges are not
properly constrained. This applies primarily to the "dotprod" (dot
product) function.
The MMX (MultiMedia eXtensions) instruction set was introduced on
later Pentium CPUs; it is also implemented on the Pentium II and most
AMD CPUs starting with the K6. SSE (SIMD Streaming Extensions) was
introduced in the Pentium III; AMD calls it "3D Now! Professional".
Intel introduced SSE2 on the Pentium 4, and it has been picked up by
later AMD CPUs. SSE support implies MMX support, while SSE2 support
implies both SSE and MMX support.
The latest IA-32 SIMD instruction set, SSE3 (also known as "Prescott
New Instructions") was introduced in early 2004 with the latest
("Prescott") revision of the Pentium 4. Relatively little was
introduced with SSE3, and this library currently makes no use of it.
See the various manual pages for details on how to use the library
routines.
Copyright 2006, Phil Karn, KA9Q
karn@ka9q.net
http://www.ka9q.net/
This software may be used under the terms of the GNU Lesser General
Public License (LGPL); see the file lesser.txt for details.
Revision history:
Version 1.0 released 29 May 2001
Version 2.0 released 3 Dec 2001:
Restructured to add support for shared libraries.
Version 2.0.1 released 8 Dec 2001:
Includes autoconf/configure script
Version 2.0.2 released 4 Feb 2002:
Add SIMD version override options
Test for lack of SSE2 mnemonic support in 'as'
Build only selected version
Version 2.0.3 released 6 Feb 2002:
Fix to parityb function in parity.h
feclib version 1.0 released November 2003
Merged SIMD-Viterbi, RS and DSP libraries
Changed SIMD Viterbi decoder to detect SSE2/SSE/MMX at runtime rather than build time
feclib version 2.0 (unreleased) Mar 2004
General speedups and cleanups
Switch from 4 to 8-bit input symbols on all Viterbi decoders
Support for Altivec on PowerPC
Support for k=15 r=1/6 Cassini/Mars Pathfinder/Mars Exploration Rover/STEREO code
Changed license to GNU Lesser General Public License (LGPL)
feclib version 2.1 June 5 2006
Added error checking, fixed alignment bug in SSE2 versions of Viterbi decoders causing segfaults
feclib version 2.1.1 June 6 2006
Fix test/benchmark time measurement on Linux

13
libfec/README.x86-64 Normal file
View File

@ -0,0 +1,13 @@
This library has been modified to compile natively on x86-64.
An attempt has been made to adapt the assembly code, but due to unsolved issues with
the fact that shared libraries on x86-64 have to be compiled with PIC, this approach is
not finished.
This code therefore only uses the portable C implementation, which is certainly slower than
the assembly SSE2 that could ideally be used.
It could be said that we trade performance against the possibility to compile on x86-64.
feb, 2012
Matthias P. Braendli, HB9EGM

6
libfec/bootstrap Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
aclocal && \
autoheader && \
autoconf

5
libfec/ccsds.h Normal file
View File

@ -0,0 +1,5 @@
typedef unsigned char data_t;
extern unsigned char Taltab[],Tal1tab[];
#define NN 255
#define NROOTS 32

24
libfec/char.h Normal file
View File

@ -0,0 +1,24 @@
/* Stuff specific to the 8-bit symbol version of the general purpose RS codecs
*
* Copyright 2003, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
typedef unsigned char data_t;
#define MODNN(x) modnn(rs,x)
#define MM (rs->mm)
#define NN (rs->nn)
#define ALPHA_TO (rs->alpha_to)
#define INDEX_OF (rs->index_of)
#define GENPOLY (rs->genpoly)
#define NROOTS (rs->nroots)
#define FCR (rs->fcr)
#define PRIM (rs->prim)
#define IPRIM (rs->iprim)
#define PAD (rs->pad)
#define A0 (NN)

View File

@ -0,0 +1,115 @@
# Portions of this file have been borrowed from and/or inspired by
# the Version.cmake from the rtl-sdr project.
# http://sdr.osmocom.org/trac/wiki/rtl-sdr
#
# Provides:
# ${VERSION_INFO_BASE} - Major.Minor.Patch
# ${VERSION_INFO} - Major.minor.Patch[-git_info]
#
# Requires values for:
# ${VERSION_INFO_MAJOR} - Increment on API compatibility changes.
# ${VERSION_INFO_MINOR} - Increment when adding features.
# ${VERSION_INFO_PATCH} - Increment for bug and documentation changes.
#
# Optional:
# ${VERSION_INFO_EXTRA} - Set to "git" to append git info. This is
# intended only for non-versioned development
# builds
# ${VERSION_INFO_OVERRIDE} - Set to a non-null value to override the
# VERSION_INFO_EXTRA logic. This is intended
# for automated snapshot builds from exported
# trees, to pass in the git revision info.
#
if(DEFINED __INCLUDED_TOOLAME-DAB_VERSION_CMAKE)
return()
endif()
set(__INCLUDED_TOOLAME-DAB_VERSION_CMAKE TRUE)
################################################################################
# Gather up variables provided by parent script
################################################################################
if(NOT DEFINED VERSION_INFO_MAJOR)
message(FATAL_ERROR "VERSION_INFO_MAJOR is not defined")
else()
set(VER_MAJ ${VERSION_INFO_MAJOR})
endif()
if(NOT DEFINED VERSION_INFO_MINOR)
message(FATAL_ERROR "VERSION_INFO_MINOR is not defined")
else()
set(VER_MIN ${VERSION_INFO_MINOR})
endif()
if(NOT DEFINED VERSION_INFO_PATCH)
message(FATAL_ERROR "VERSION_INFO_PATCH is not defined")
else()
set(VER_PAT ${VERSION_INFO_PATCH})
endif()
################################################################################
# Craft version number, using git, if needed
################################################################################
find_package(Git QUIET)
if(GIT_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --
ERROR_QUIET
RESULT_VARIABLE NOT_GIT_REPOSITORY
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
if(NOT_GIT_REPOSITORY)
set(GIT_INFO "-unknown")
else()
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD --
OUTPUT_VARIABLE GIT_REV OUTPUT_STRIP_TRAILING_WHITESPACE
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
execute_process(
COMMAND ${GIT_EXECUTABLE} diff-index --quiet HEAD --
RESULT_VARIABLE GIT_DIRTY
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
)
if(GIT_DIRTY)
set(GIT_INFO "-${GIT_REV}-dirty")
else()
set(GIT_INFO "-${GIT_REV}")
endif()
endif()
else()
message(WARNING "git missing -- unable to check libfec version.")
unset(NOT_GIT_REPOSITORY)
unset(GIT_REV)
unset(GIT_DIRTY)
endif()
################################################################################
# Provide
################################################################################
set(VERSION_INFO_BASE "${VER_MAJ}.${VER_MIN}.${VER_PAT}")
# Force the version suffix. Used for automated export builds.
if(VERSION_INFO_OVERRIDE)
set(VERSION_INFO "${VERSION_INFO_BASE}-${VERSION_INFO_OVERRIDE}")
# Intra-release builds
elseif("${VERSION_INFO_EXTRA}" STREQUAL "git")
set(VERSION_INFO "${VERSION_INFO_BASE}-git${GIT_INFO}")
# Versioned releases
elseif("${VERSION_INFO_EXTRA}" STREQUAL "")
set(VERSION_INFO "${VERSION_INFO_BASE}")
# Invalid
else()
message(FATAL_ERROR
"Unexpected definition of VERSION_INFO_EXTRA: ${VERSION_INFO_EXTRA}")
endif()

View File

@ -0,0 +1,21 @@
if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
string(REGEX REPLACE "\n" ";" files "${files}")
foreach(file ${files})
message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
exec_program(
"@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
OUTPUT_VARIABLE rm_out
RETURN_VALUE rm_retval
)
if(NOT "${rm_retval}" STREQUAL 0)
message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
endif(NOT "${rm_retval}" STREQUAL 0)
else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
endforeach(file)

1516
libfec/config.guess vendored Normal file

File diff suppressed because it is too large Load Diff

1362
libfec/config.sub vendored Executable file

File diff suppressed because it is too large Load Diff

90
libfec/configure.in Normal file
View File

@ -0,0 +1,90 @@
dnl Process this file with autoconf to produce a configure script.
AC_INIT(viterbi27.c)
AC_CONFIG_HEADER(config.h)
SO_NAME=3
VERSION=3.0.0
AC_SUBST(SO_NAME)
AC_SUBST(VERSION)
dnl Checks for programs.
AC_PROG_CC
if test $GCC != "yes"
then
AC_MSG_ERROR([Need GNU C compiler])
fi
dnl Checks for libraries.
AC_CHECK_LIB(c, malloc)
dnl Checks for header files.
AC_CHECK_HEADERS(getopt.h stdio.h stdlib.h memory.h string.h)
if test -z "$HAVE_stdio.h"
then
AC_MSG_ERROR([Need stdio.h!])
fi
if test -z "$HAVE_stdlib.h"
then
AC_MSG_ERROR([Need stdlib.h!])
fi
if test -z "$HAVE_stdlib.h"
then
AC_MSG_ERROR([Need memory.h!])
fi
if test -z "$HAVE_string.h"
then
AC_MSG_ERROR([Need string.h])
fi
AC_CANONICAL_SYSTEM
case $target_cpu in
x86_64)
ARCH_OPTION="-msse2"
MLIBS="dotprod_port.o \
peakval_port.o \
sumsq.o sumsq_port.o \
cpu_mode_x86_64.o"
;;
i386|i486|i586|i686)
ARCH_OPTION="-march=$target_cpu"
MLIBS="viterbi27_mmx.o mmxbfly27.o viterbi27_sse.o ssebfly27.o viterbi27_sse2.o sse2bfly27.o \
viterbi29_mmx.o mmxbfly29.o viterbi29_sse.o ssebfly29.o viterbi29_sse2.o sse2bfly29.o \
viterbi39_sse2.o viterbi39_sse.o viterbi39_mmx.o \
viterbi615_mmx.o viterbi615_sse.o viterbi615_sse2.o \
dotprod_mmx.o dotprod_mmx_assist.o \
dotprod_sse2.o dotprod_sse2_assist.o \
peakval_mmx.o peakval_mmx_assist.o \
peakval_sse.o peakval_sse_assist.o \
peakval_sse2.o peakval_sse2_assist.o \
sumsq.o sumsq_port.o \
sumsq_sse2.o sumsq_sse2_assist.o \
sumsq_mmx.o sumsq_mmx_assist.o \
cpu_features.o cpu_mode_x86.o"
;;
powerpc*)
ARCH_OPTION="-fno-common -faltivec"
MLIBS="viterbi27_av.o viterbi29_av.o viterbi39_av.o viterbi615_av.o \
encode_rs_av.o \
dotprod_av.o sumsq_av.o peakval_av.o cpu_mode_ppc.o"
;;
*)
MLIBS="cpu_mode_generic.o"
esac
case $target_os in
darwin*)
SH_LIB=libfec.dylib
REBIND=""
;;
*)
SH_LIB=libfec.so
REBIND=ldconfig
;;
esac
AC_SUBST(SH_LIB)
AC_SUBST(REBIND)
AC_SUBST(MLIBS)
AC_SUBST(ARCH_OPTION)
dnl Checks for library functions.
AC_CHECK_FUNCS(getopt_long memset memmove)
AC_OUTPUT(makefile)

15
libfec/cpu_features.s Normal file
View File

@ -0,0 +1,15 @@
.text
.global cpu_features
.type cpu_features,@function
cpu_features:
pushl %ebx
pushl %ecx
pushl %edx
movl $1,%eax
cpuid
movl %edx,%eax
popl %edx
popl %ecx
popl %ebx
ret

13
libfec/cpu_mode_generic.c Normal file
View File

@ -0,0 +1,13 @@
/* Determine CPU support for SIMD on Power PC
* Copyright 2004 Phil Karn, KA9Q
* Copyright 2014 Matthias P. Braendli, HB9EGM
*/
#include <stdio.h>
#include "fec.h"
enum cpu_mode Cpu_mode;
// Use the portable code for this unknown CPU
void find_cpu_mode(void) {
Cpu_mode = PORT;
}

40
libfec/cpu_mode_ppc.c Normal file
View File

@ -0,0 +1,40 @@
/* Determine CPU support for SIMD on Power PC
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdio.h>
#include "fec.h"
#ifdef __VEC__
#include <sys/sysctl.h>
#endif
/* Various SIMD instruction set names */
char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)",
"x86 Streaming SIMD Extensions (SSE)",
"x86 Streaming SIMD Extensions 2 (SSE2)",
"PowerPC G4/G5 Altivec/Velocity Engine"};
enum cpu_mode Cpu_mode;
void find_cpu_mode(void){
if(Cpu_mode != UNKNOWN)
return;
#ifdef __VEC__
{
/* Ask the OS if we have Altivec support */
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
int hasVectorUnit = 0;
size_t length = sizeof(hasVectorUnit);
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
if(0 == error && hasVectorUnit)
Cpu_mode = ALTIVEC;
else
Cpu_mode = PORT;
}
#else
Cpu_mode = PORT;
#endif
fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]);
}

33
libfec/cpu_mode_x86.c Normal file
View File

@ -0,0 +1,33 @@
/* Determine CPU support for SIMD
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdio.h>
#include "fec.h"
/* Various SIMD instruction set names */
char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)",
"x86 Streaming SIMD Extensions (SSE)",
"x86 Streaming SIMD Extensions 2 (SSE2)",
"PowerPC G4/G5 Altivec/Velocity Engine"};
enum cpu_mode Cpu_mode;
void find_cpu_mode(void){
int f;
if(Cpu_mode != UNKNOWN)
return;
/* Figure out what kind of CPU we have */
f = cpu_features();
if(f & (1<<26)){ /* SSE2 is present */
Cpu_mode = SSE2;
} else if(f & (1<<25)){ /* SSE is present */
Cpu_mode = SSE;
} else if(f & (1<<23)){ /* MMX is present */
Cpu_mode = MMX;
} else { /* No SIMD at all */
Cpu_mode = PORT;
}
fprintf(stderr,"SIMD CPU detect: %s\n",Cpu_modes[Cpu_mode]);
}

27
libfec/cpu_mode_x86_64.c Normal file
View File

@ -0,0 +1,27 @@
/* Determine CPU support for SIMD
* Copyright 2004 Phil Karn, KA9Q
*
* Modified in 2012 by Matthias P. Braendli, HB9EGM
*/
#include <stdio.h>
#include "fec.h"
/* Various SIMD instruction set names */
char *Cpu_modes[] = {"Unknown","Portable C","x86 Multi Media Extensions (MMX)",
"x86 Streaming SIMD Extensions (SSE)",
"x86 Streaming SIMD Extensions 2 (SSE2)",
"PowerPC G4/G5 Altivec/Velocity Engine"};
enum cpu_mode Cpu_mode;
void find_cpu_mode(void){
int f;
if(Cpu_mode != UNKNOWN)
return;
/* According to the wikipedia entry x86-64, all x86-64 processors have SSE2 */
/* The same assumption is also in other source files ! */
Cpu_mode = SSE2;
fprintf(stderr,"CPU: x86-64, using portable C implementation\n");
}

262
libfec/decode_rs.c Normal file
View File

@ -0,0 +1,262 @@
/* Reed-Solomon decoder
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#ifdef DEBUG
#include <stdio.h>
#endif
#include <string.h>
#define NULL ((void *)0)
#define min(a,b) ((a) < (b) ? (a) : (b))
#ifdef FIXED
#include "fixed.h"
#elif defined(BIGSYM)
#include "int.h"
#else
#include "char.h"
#endif
int DECODE_RS(
#ifdef FIXED
data_t *data, int *eras_pos, int no_eras,int pad){
#else
void *p,data_t *data, int *eras_pos, int no_eras){
struct rs *rs = (struct rs *)p;
#endif
int deg_lambda, el, deg_omega;
int i, j, r,k;
data_t u,q,tmp,num1,num2,den,discr_r;
data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly
* and syndrome poly */
data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1];
data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS];
int syn_error, count;
#ifdef FIXED
/* Check pad parameter for validity */
if(pad < 0 || pad >= NN)
return -1;
#endif
/* form the syndromes; i.e., evaluate data(x) at roots of g(x) */
for(i=0;i<NROOTS;i++)
s[i] = data[0];
for(j=1;j<NN-PAD;j++){
for(i=0;i<NROOTS;i++){
if(s[i] == 0){
s[i] = data[j];
} else {
s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)];
}
}
}
/* Convert syndromes to index form, checking for nonzero condition */
syn_error = 0;
for(i=0;i<NROOTS;i++){
syn_error |= s[i];
s[i] = INDEX_OF[s[i]];
}
if (!syn_error) {
/* if syndrome is zero, data[] is a codeword and there are no
* errors to correct. So return data[] unmodified
*/
count = 0;
goto finish;
}
memset(&lambda[1],0,NROOTS*sizeof(lambda[0]));
lambda[0] = 1;
if (no_eras > 0) {
/* Init lambda to be the erasure locator polynomial */
lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))];
for (i = 1; i < no_eras; i++) {
u = MODNN(PRIM*(NN-1-eras_pos[i]));
for (j = i+1; j > 0; j--) {
tmp = INDEX_OF[lambda[j - 1]];
if(tmp != A0)
lambda[j] ^= ALPHA_TO[MODNN(u + tmp)];
}
}
#if DEBUG >= 1
/* Test code that verifies the erasure locator polynomial just constructed
Needed only for decoder debugging. */
/* find roots of the erasure location polynomial */
for(i=1;i<=no_eras;i++)
reg[i] = INDEX_OF[lambda[i]];
count = 0;
for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
q = 1;
for (j = 1; j <= no_eras; j++)
if (reg[j] != A0) {
reg[j] = MODNN(reg[j] + j);
q ^= ALPHA_TO[reg[j]];
}
if (q != 0)
continue;
/* store root and error location number indices */
root[count] = i;
loc[count] = k;
count++;
}
if (count != no_eras) {
printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras);
count = -1;
goto finish;
}
#if DEBUG >= 2
printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n");
for (i = 0; i < count; i++)
printf("%d ", loc[i]);
printf("\n");
#endif
#endif
}
for(i=0;i<NROOTS+1;i++)
b[i] = INDEX_OF[lambda[i]];
/*
* Begin Berlekamp-Massey algorithm to determine error+erasure
* locator polynomial
*/
r = no_eras;
el = no_eras;
while (++r <= NROOTS) { /* r is the step number */
/* Compute discrepancy at the r-th step in poly-form */
discr_r = 0;
for (i = 0; i < r; i++){
if ((lambda[i] != 0) && (s[r-i-1] != A0)) {
discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])];
}
}
discr_r = INDEX_OF[discr_r]; /* Index form */
if (discr_r == A0) {
/* 2 lines below: B(x) <-- x*B(x) */
memmove(&b[1],b,NROOTS*sizeof(b[0]));
b[0] = A0;
} else {
/* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */
t[0] = lambda[0];
for (i = 0 ; i < NROOTS; i++) {
if(b[i] != A0)
t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])];
else
t[i+1] = lambda[i+1];
}
if (2 * el <= r + no_eras - 1) {
el = r + no_eras - el;
/*
* 2 lines below: B(x) <-- inv(discr_r) *
* lambda(x)
*/
for (i = 0; i <= NROOTS; i++)
b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN);
} else {
/* 2 lines below: B(x) <-- x*B(x) */
memmove(&b[1],b,NROOTS*sizeof(b[0]));
b[0] = A0;
}
memcpy(lambda,t,(NROOTS+1)*sizeof(t[0]));
}
}
/* Convert lambda to index form and compute deg(lambda(x)) */
deg_lambda = 0;
for(i=0;i<NROOTS+1;i++){
lambda[i] = INDEX_OF[lambda[i]];
if(lambda[i] != A0)
deg_lambda = i;
}
/* Find roots of the error+erasure locator polynomial by Chien search */
memcpy(&reg[1],&lambda[1],NROOTS*sizeof(reg[0]));
count = 0; /* Number of roots of lambda(x) */
for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
q = 1; /* lambda[0] is always 0 */
for (j = deg_lambda; j > 0; j--){
if (reg[j] != A0) {
reg[j] = MODNN(reg[j] + j);
q ^= ALPHA_TO[reg[j]];
}
}
if (q != 0)
continue; /* Not a root */
/* store root (index-form) and error location number */
#if DEBUG>=2
printf("count %d root %d loc %d\n",count,i,k);
#endif
root[count] = i;
loc[count] = k;
/* If we've already found max possible roots,
* abort the search to save time
*/
if(++count == deg_lambda)
break;
}
if (deg_lambda != count) {
/*
* deg(lambda) unequal to number of roots => uncorrectable
* error detected
*/
count = -1;
goto finish;
}
/*
* Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
* x**NROOTS). in index form. Also find deg(omega).
*/
deg_omega = deg_lambda-1;
for (i = 0; i <= deg_omega;i++){
tmp = 0;
for(j=i;j >= 0; j--){
if ((s[i - j] != A0) && (lambda[j] != A0))
tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])];
}
omega[i] = INDEX_OF[tmp];
}
/*
* Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
* inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form
*/
for (j = count-1; j >=0; j--) {
num1 = 0;
for (i = deg_omega; i >= 0; i--) {
if (omega[i] != A0)
num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])];
}
num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)];
den = 0;
/* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */
for (i = min(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) {
if(lambda[i+1] != A0)
den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])];
}
#if DEBUG >= 1
if (den == 0) {
printf("\n ERROR: denominator = 0\n");
count = -1;
goto finish;
}
#endif
/* Apply error to data */
if (num1 != 0 && loc[j] >= PAD) {
data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])];
}
}
finish:
if(eras_pos != NULL){
for(i=0;i<count;i++)
eras_pos[i] = loc[i];
}
return count;
}

298
libfec/decode_rs.h Normal file
View File

@ -0,0 +1,298 @@
/* The guts of the Reed-Solomon decoder, meant to be #included
* into a function body with the following typedefs, macros and variables supplied
* according to the code parameters:
* data_t - a typedef for the data symbol
* data_t data[] - array of NN data and parity symbols to be corrected in place
* retval - an integer lvalue into which the decoder's return code is written
* NROOTS - the number of roots in the RS code generator polynomial,
* which is the same as the number of parity symbols in a block.
Integer variable or literal.
* NN - the total number of symbols in a RS block. Integer variable or literal.
* PAD - the number of pad symbols in a block. Integer variable or literal.
* ALPHA_TO - The address of an array of NN elements to convert Galois field
* elements in index (log) form to polynomial form. Read only.
* INDEX_OF - The address of an array of NN elements to convert Galois field
* elements in polynomial form to index (log) form. Read only.
* MODNN - a function to reduce its argument modulo NN. May be inline or a macro.
* FCR - An integer literal or variable specifying the first consecutive root of the
* Reed-Solomon generator polynomial. Integer variable or literal.
* PRIM - The primitive root of the generator poly. Integer variable or literal.
* DEBUG - If set to 1 or more, do various internal consistency checking. Leave this
* undefined for production code
* The memset(), memmove(), and memcpy() functions are used. The appropriate header
* file declaring these functions (usually <string.h>) must be included by the calling
* program.
*/
#if !defined(NROOTS)
#error "NROOTS not defined"
#endif
#if !defined(NN)
#error "NN not defined"
#endif
#if !defined(PAD)
#error "PAD not defined"
#endif
#if !defined(ALPHA_TO)
#error "ALPHA_TO not defined"
#endif
#if !defined(INDEX_OF)
#error "INDEX_OF not defined"
#endif
#if !defined(MODNN)
#error "MODNN not defined"
#endif
#if !defined(FCR)
#error "FCR not defined"
#endif
#if !defined(PRIM)
#error "PRIM not defined"
#endif
#if !defined(NULL)
#define NULL ((void *)0)
#endif
#undef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#undef A0
#define A0 (NN)
{
int deg_lambda, el, deg_omega;
int i, j, r,k;
data_t u,q,tmp,num1,num2,den,discr_r;
data_t lambda[NROOTS+1], s[NROOTS]; /* Err+Eras Locator poly
* and syndrome poly */
data_t b[NROOTS+1], t[NROOTS+1], omega[NROOTS+1];
data_t root[NROOTS], reg[NROOTS+1], loc[NROOTS];
int syn_error, count;
/* form the syndromes; i.e., evaluate data(x) at roots of g(x) */
for(i=0;i<NROOTS;i++)
s[i] = data[0];
for(j=1;j<NN-PAD;j++){
for(i=0;i<NROOTS;i++){
if(s[i] == 0){
s[i] = data[j];
} else {
s[i] = data[j] ^ ALPHA_TO[MODNN(INDEX_OF[s[i]] + (FCR+i)*PRIM)];
}
}
}
/* Convert syndromes to index form, checking for nonzero condition */
syn_error = 0;
for(i=0;i<NROOTS;i++){
syn_error |= s[i];
s[i] = INDEX_OF[s[i]];
}
if (!syn_error) {
/* if syndrome is zero, data[] is a codeword and there are no
* errors to correct. So return data[] unmodified
*/
count = 0;
goto finish;
}
memset(&lambda[1],0,NROOTS*sizeof(lambda[0]));
lambda[0] = 1;
if (no_eras > 0) {
/* Init lambda to be the erasure locator polynomial */
lambda[1] = ALPHA_TO[MODNN(PRIM*(NN-1-eras_pos[0]))];
for (i = 1; i < no_eras; i++) {
u = MODNN(PRIM*(NN-1-eras_pos[i]));
for (j = i+1; j > 0; j--) {
tmp = INDEX_OF[lambda[j - 1]];
if(tmp != A0)
lambda[j] ^= ALPHA_TO[MODNN(u + tmp)];
}
}
#if DEBUG >= 1
/* Test code that verifies the erasure locator polynomial just constructed
Needed only for decoder debugging. */
/* find roots of the erasure location polynomial */
for(i=1;i<=no_eras;i++)
reg[i] = INDEX_OF[lambda[i]];
count = 0;
for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
q = 1;
for (j = 1; j <= no_eras; j++)
if (reg[j] != A0) {
reg[j] = MODNN(reg[j] + j);
q ^= ALPHA_TO[reg[j]];
}
if (q != 0)
continue;
/* store root and error location number indices */
root[count] = i;
loc[count] = k;
count++;
}
if (count != no_eras) {
printf("count = %d no_eras = %d\n lambda(x) is WRONG\n",count,no_eras);
count = -1;
goto finish;
}
#if DEBUG >= 2
printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n");
for (i = 0; i < count; i++)
printf("%d ", loc[i]);
printf("\n");
#endif
#endif
}
for(i=0;i<NROOTS+1;i++)
b[i] = INDEX_OF[lambda[i]];
/*
* Begin Berlekamp-Massey algorithm to determine error+erasure
* locator polynomial
*/
r = no_eras;
el = no_eras;
while (++r <= NROOTS) { /* r is the step number */
/* Compute discrepancy at the r-th step in poly-form */
discr_r = 0;
for (i = 0; i < r; i++){
if ((lambda[i] != 0) && (s[r-i-1] != A0)) {
discr_r ^= ALPHA_TO[MODNN(INDEX_OF[lambda[i]] + s[r-i-1])];
}
}
discr_r = INDEX_OF[discr_r]; /* Index form */
if (discr_r == A0) {
/* 2 lines below: B(x) <-- x*B(x) */
memmove(&b[1],b,NROOTS*sizeof(b[0]));
b[0] = A0;
} else {
/* 7 lines below: T(x) <-- lambda(x) - discr_r*x*b(x) */
t[0] = lambda[0];
for (i = 0 ; i < NROOTS; i++) {
if(b[i] != A0)
t[i+1] = lambda[i+1] ^ ALPHA_TO[MODNN(discr_r + b[i])];
else
t[i+1] = lambda[i+1];
}
if (2 * el <= r + no_eras - 1) {
el = r + no_eras - el;
/*
* 2 lines below: B(x) <-- inv(discr_r) *
* lambda(x)
*/
for (i = 0; i <= NROOTS; i++)
b[i] = (lambda[i] == 0) ? A0 : MODNN(INDEX_OF[lambda[i]] - discr_r + NN);
} else {
/* 2 lines below: B(x) <-- x*B(x) */
memmove(&b[1],b,NROOTS*sizeof(b[0]));
b[0] = A0;
}
memcpy(lambda,t,(NROOTS+1)*sizeof(t[0]));
}
}
/* Convert lambda to index form and compute deg(lambda(x)) */
deg_lambda = 0;
for(i=0;i<NROOTS+1;i++){
lambda[i] = INDEX_OF[lambda[i]];
if(lambda[i] != A0)
deg_lambda = i;
}
/* Find roots of the error+erasure locator polynomial by Chien search */
memcpy(&reg[1],&lambda[1],NROOTS*sizeof(reg[0]));
count = 0; /* Number of roots of lambda(x) */
for (i = 1,k=IPRIM-1; i <= NN; i++,k = MODNN(k+IPRIM)) {
q = 1; /* lambda[0] is always 0 */
for (j = deg_lambda; j > 0; j--){
if (reg[j] != A0) {
reg[j] = MODNN(reg[j] + j);
q ^= ALPHA_TO[reg[j]];
}
}
if (q != 0)
continue; /* Not a root */
/* store root (index-form) and error location number */
#if DEBUG>=2
printf("count %d root %d loc %d\n",count,i,k);
#endif
root[count] = i;
loc[count] = k;
/* If we've already found max possible roots,
* abort the search to save time
*/
if(++count == deg_lambda)
break;
}
if (deg_lambda != count) {
/*
* deg(lambda) unequal to number of roots => uncorrectable
* error detected
*/
count = -1;
goto finish;
}
/*
* Compute err+eras evaluator poly omega(x) = s(x)*lambda(x) (modulo
* x**NROOTS). in index form. Also find deg(omega).
*/
deg_omega = deg_lambda-1;
for (i = 0; i <= deg_omega;i++){
tmp = 0;
for(j=i;j >= 0; j--){
if ((s[i - j] != A0) && (lambda[j] != A0))
tmp ^= ALPHA_TO[MODNN(s[i - j] + lambda[j])];
}
omega[i] = INDEX_OF[tmp];
}
/*
* Compute error values in poly-form. num1 = omega(inv(X(l))), num2 =
* inv(X(l))**(FCR-1) and den = lambda_pr(inv(X(l))) all in poly-form
*/
for (j = count-1; j >=0; j--) {
num1 = 0;
for (i = deg_omega; i >= 0; i--) {
if (omega[i] != A0)
num1 ^= ALPHA_TO[MODNN(omega[i] + i * root[j])];
}
num2 = ALPHA_TO[MODNN(root[j] * (FCR - 1) + NN)];
den = 0;
/* lambda[i+1] for i even is the formal derivative lambda_pr of lambda[i] */
for (i = MIN(deg_lambda,NROOTS-1) & ~1; i >= 0; i -=2) {
if(lambda[i+1] != A0)
den ^= ALPHA_TO[MODNN(lambda[i+1] + i * root[j])];
}
#if DEBUG >= 1
if (den == 0) {
printf("\n ERROR: denominator = 0\n");
count = -1;
goto finish;
}
#endif
/* Apply error to data */
if (num1 != 0 && loc[j] >= PAD) {
data[loc[j]-PAD] ^= ALPHA_TO[MODNN(INDEX_OF[num1] + INDEX_OF[num2] + NN - INDEX_OF[den])];
}
}
finish:
if(eras_pos != NULL){
for(i=0;i<count;i++)
eras_pos[i] = loc[i];
}
retval = count;
}

24
libfec/decode_rs_8.c Normal file
View File

@ -0,0 +1,24 @@
/* General purpose Reed-Solomon decoder for 8-bit symbols or less
* Copyright 2003 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#ifdef DEBUG
#include <stdio.h>
#endif
#include <string.h>
#include "fixed.h"
int decode_rs_8(data_t *data, int *eras_pos, int no_eras, int pad){
int retval;
if(pad < 0 || pad > 222){
return -1;
}
#include "decode_rs.h"
return retval;
}

26
libfec/decode_rs_ccsds.c Normal file
View File

@ -0,0 +1,26 @@
/* This function wraps around the fixed 8-bit decoder, performing the
* basis transformations necessary to meet the CCSDS standard
*
* Copyright 2002, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include "ccsds.h"
#include "fec.h"
int decode_rs_ccsds(data_t *data,int *eras_pos,int no_eras,int pad){
int i,r;
data_t cdata[NN];
/* Convert data from dual basis to conventional */
for(i=0;i<NN-pad;i++)
cdata[i] = Tal1tab[data[i]];
r = decode_rs_8(cdata,eras_pos,no_eras,pad);
if(r > 0){
/* Convert from conventional to dual basis */
for(i=0;i<NN-pad;i++)
data[i] = Taltab[cdata[i]];
}
return r;
}

22
libfec/decode_rs_char.c Normal file
View File

@ -0,0 +1,22 @@
/* General purpose Reed-Solomon decoder for 8-bit symbols or less
* Copyright 2003 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#ifdef DEBUG
#include <stdio.h>
#endif
#include <string.h>
#include "char.h"
#include "rs-common.h"
int decode_rs_char(void *p, data_t *data, int *eras_pos, int no_eras){
int retval;
struct rs *rs = (struct rs *)p;
#include "decode_rs.h"
return retval;
}

22
libfec/decode_rs_int.c Normal file
View File

@ -0,0 +1,22 @@
/* General purpose Reed-Solomon decoder
* Copyright 2003 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#ifdef DEBUG
#include <stdio.h>
#endif
#include <string.h>
#include "int.h"
#include "rs-common.h"
int decode_rs_int(void *p, data_t *data, int *eras_pos, int no_eras){
int retval;
struct rs *rs = (struct rs *)p;
#include "decode_rs.h"
return retval;
}

111
libfec/dotprod.c Normal file
View File

@ -0,0 +1,111 @@
/* 16-bit signed integer dot product
* Switch to appropriate versions
* Copyright 2004 Phil Karn
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
void *initdp_port(signed short coeffs[],int len);
long dotprod_port(void *p,signed short *b);
void freedp_port(void *p);
#ifdef __i386__
void *initdp_mmx(signed short coeffs[],int len);
void *initdp_sse2(signed short coeffs[],int len);
long dotprod_mmx(void *p,signed short *b);
long dotprod_sse2(void *p,signed short *b);
void freedp_mmx(void *p);
void freedp_sse2(void *p);
#endif
#ifdef __VEC__
void *initdp_av(signed short coeffs[],int len);
long dotprod_av(void *p,signed short *b);
void freedp_av(void *p);
#endif
/* Create and return a descriptor for use with the dot product function */
void *initdp(signed short coeffs[],int len){
find_cpu_mode();
switch(Cpu_mode){
case PORT:
default:
return initdp_port(coeffs,len);
#ifdef __i386__
case MMX:
case SSE:
return initdp_mmx(coeffs,len);
case SSE2:
return initdp_sse2(coeffs,len);
#endif
#ifdef __x86_64__
case SSE2:
return initdp_port(coeffs,len);
#endif
#ifdef __VEC__
case ALTIVEC:
return initdp_av(coeffs,len);
#endif
}
}
/* Free a dot product descriptor created earlier */
void freedp(void *p){
switch(Cpu_mode){
case PORT:
default:
return freedp_port(p);
#ifdef __i386__
case MMX:
case SSE:
return freedp_mmx(p);
case SSE2:
return freedp_sse2(p);
#endif
#ifdef __x86_64__
case SSE2:
return freedp_port(p);
#endif
#ifdef __VEC__
case ALTIVEC:
return freedp_av(p);
#endif
}
}
/* Compute a dot product given a descriptor and an input array
* The length is taken from the descriptor
*/
long dotprod(void *p,signed short a[]){
switch(Cpu_mode){
case PORT:
default:
return dotprod_port(p,a);
#ifdef __i386__
case MMX:
case SSE:
return dotprod_mmx(p,a);
case SSE2:
return dotprod_sse2(p,a);
#endif
#ifdef __x86_64__
case SSE2:
return dotprod_port(p,a);
#endif
#ifdef __VEC__
case ALTIVEC:
return dotprod_av(p,a);
#endif
}
}

15
libfec/dotprod.h Normal file
View File

@ -0,0 +1,15 @@
/* Internal definitions for dotproduct function */
struct dotprod {
int len; /* Number of coefficients */
/* On a MMX or SSE machine, these hold 4 copies of the coefficients,
* preshifted by 0,1,2,3 words to meet all possible input data
* alignments (see Intel ap559 on MMX dot products).
*
* SSE2 is similar, but with 8 words at a time
*
* On a non-MMX machine, only one copy is present
*/
signed short *coeffs[8];
};

93
libfec/dotprod_av.c Normal file
View File

@ -0,0 +1,93 @@
/* 16-bit signed integer dot product
* Altivec-assisted version
* Copyright 2004 Phil Karn
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
struct dotprod {
int len; /* Number of coefficients */
/* On an Altivec machine, these hold 8 copies of the coefficients,
* preshifted by 0,1,..7 words to meet all possible input data
*/
signed short *coeffs[8];
};
/* Create and return a descriptor for use with the dot product function */
void *initdp_av(signed short coeffs[],int len){
struct dotprod *dp;
int i,j;
if(len == 0)
return NULL;
dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
dp->len = len;
/* Make 8 copies of coefficients, one for each data alignment,
* each aligned to 16-byte boundary
*/
for(i=0;i<8;i++){
dp->coeffs[i] = calloc(1+(len+i-1)/8,sizeof(vector signed short));
for(j=0;j<len;j++)
dp->coeffs[i][j+i] = coeffs[j];
}
return (void *)dp;
}
/* Free a dot product descriptor created earlier */
void freedp_av(void *p){
struct dotprod *dp = (struct dotprod *)p;
int i;
for(i=0;i<8;i++)
if(dp->coeffs[i] != NULL)
free(dp->coeffs[i]);
free(dp);
}
/* Compute a dot product given a descriptor and an input array
* The length is taken from the descriptor
*/
long dotprod_av(void *p,signed short a[]){
struct dotprod *dp = (struct dotprod *)p;
int al;
vector signed short *ar,*d;
vector signed int sums0,sums1,sums2,sums3;
union { vector signed int v; signed int w[4];} s;
int nblocks;
/* round ar down to beginning of 16-byte block containing 0th element of
* input buffer. Then set d to one of 8 sets of shifted coefficients
*/
ar = (vector signed short *)((int)a & ~15);
al = ((int)a & 15)/sizeof(signed short);
d = (vector signed short *)dp->coeffs[al];
nblocks = (dp->len+al-1)/8+1;
/* Sum into four vectors each holding four 32-bit partial sums */
sums3 = sums2 = sums1 = sums0 = (vector signed int)(0);
while(nblocks >= 4){
sums0 = vec_msums(ar[nblocks-1],d[nblocks-1],sums0);
sums1 = vec_msums(ar[nblocks-2],d[nblocks-2],sums1);
sums2 = vec_msums(ar[nblocks-3],d[nblocks-3],sums2);
sums3 = vec_msums(ar[nblocks-4],d[nblocks-4],sums3);
nblocks -= 4;
}
sums0 = vec_adds(sums0,sums1);
sums2 = vec_adds(sums2,sums3);
sums0 = vec_adds(sums0,sums2);
while(nblocks-- > 0){
sums0 = vec_msums(ar[nblocks],d[nblocks],sums0);
}
/* Sum 4 partial sums into final result */
s.v = vec_sums(sums0,(vector signed int)(0));
return s.w[3];
}

81
libfec/dotprod_mmx.c Normal file
View File

@ -0,0 +1,81 @@
/* 16-bit signed integer dot product
* MMX assisted version; also for SSE
*
* Copyright 2004 Phil Karn
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
struct dotprod {
int len; /* Number of coefficients */
/* On a MMX or SSE machine, these hold 4 copies of the coefficients,
* preshifted by 0,1,2,3 words to meet all possible input data
* alignments (see Intel ap559 on MMX dot products).
*/
signed short *coeffs[4];
};
long dotprod_mmx_assist(signed short *a,signed short *b,int cnt);
/* Create and return a descriptor for use with the dot product function */
void *initdp_mmx(signed short coeffs[],int len){
struct dotprod *dp;
int i,j;
if(len == 0)
return NULL;
dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
dp->len = len;
/* Make 4 copies of coefficients, one for each data alignment */
for(i=0;i<4;i++){
dp->coeffs[i] = (signed short *)calloc(1+(len+i-1)/4,
4*sizeof(signed short));
for(j=0;j<len;j++)
dp->coeffs[i][j+i] = coeffs[j];
}
return (void *)dp;
}
/* Free a dot product descriptor created earlier */
void freedp_mmx(void *p){
struct dotprod *dp = (struct dotprod *)p;
int i;
for(i=0;i<4;i++)
if(dp->coeffs[i] != NULL)
free(dp->coeffs[i]);
free(dp);
}
/* Compute a dot product given a descriptor and an input array
* The length is taken from the descriptor
*/
long dotprod_mmx(void *p,signed short a[]){
struct dotprod *dp = (struct dotprod *)p;
int al;
signed short *ar;
/* Round input data address down to 8 byte boundary
* NB: depending on the alignment of a[], memory
* before a[] will be accessed. The contents don't matter since they'll
* be multiplied by zero coefficients. I can't conceive of any
* situation where this could cause a segfault since memory protection
* in the x86 machines is done on much larger boundaries
*/
ar = (signed short *)((int)a & ~7);
/* Choose one of 4 sets of pre-shifted coefficients. al is both the
* index into dp->coeffs[] and the number of 0 words padded onto
* that coefficients array for alignment purposes
*/
al = a - ar;
/* Call assembler routine to do the work, passing number of 4-word blocks */
return dotprod_mmx_assist(ar,dp->coeffs[al],(dp->len+al-1)/4+1);
}

View File

@ -0,0 +1,83 @@
# SIMD MMX dot product
# Equivalent to the following C code:
# long dotprod(signed short *a,signed short *b,int cnt)
# {
# long sum = 0;
# cnt *= 4;
# while(cnt--)
# sum += *a++ + *b++;
# return sum;
# }
# a and b should also be 64-bit aligned, or speed will suffer greatly
# Copyright 1999, Phil Karn KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
.global dotprod_mmx_assist
.type dotprod_mmx_assist,@function
dotprod_mmx_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ecx
pushl %ebx
movl 8(%ebp),%esi # a
movl 12(%ebp),%edi # b
movl 16(%ebp),%ecx # cnt
pxor %mm0,%mm0 # clear running sum (in two 32-bit halves)
# MMX dot product loop unrolled 4 times, crunching 16 terms per loop
.align 16
.Loop1: subl $4,%ecx
jl .Loop1Done
movq (%esi),%mm1 # mm1 = a[3],a[2],a[1],a[0]
pmaddwd (%edi),%mm1 # mm1 = b[3]*a[3]+b[2]*a[2],b[1]*a[1]+b[0]*a[0]
paddd %mm1,%mm0
movq 8(%esi),%mm1
pmaddwd 8(%edi),%mm1
paddd %mm1,%mm0
movq 16(%esi),%mm1
pmaddwd 16(%edi),%mm1
paddd %mm1,%mm0
movq 24(%esi),%mm1
addl $32,%esi
pmaddwd 24(%edi),%mm1
addl $32,%edi
paddd %mm1,%mm0
jmp .Loop1
.Loop1Done:
addl $4,%ecx
# MMX dot product loop, not unrolled, crunching 4 terms per loop
# This could be redone as Duff's Device on the unrolled loop above
.Loop2: subl $1,%ecx
jl .Loop2Done
movq (%esi),%mm1
addl $8,%esi
pmaddwd (%edi),%mm1
addl $8,%edi
paddd %mm1,%mm0
jmp .Loop2
.Loop2Done:
movd %mm0,%ebx # right-hand word to ebx
punpckhdq %mm0,%mm0 # left-hand word to right side of %mm0
movd %mm0,%eax
addl %ebx,%eax # running sum now in %eax
emms # done with MMX
popl %ebx
popl %ecx
popl %edi
popl %esi
movl %ebp,%esp
popl %ebp
ret

58
libfec/dotprod_port.c Normal file
View File

@ -0,0 +1,58 @@
/* 16-bit signed integer dot product
* Portable C version
* Copyright 2004 Phil Karn
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
struct dotprod {
int len; /* Number of coefficients */
signed short *coeffs;
};
/* Create and return a descriptor for use with the dot product function */
void *initdp_port(signed short coeffs[],int len){
struct dotprod *dp;
int j;
if(len == 0)
return NULL;
dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
dp->len = len;
/* Just one copy of the coefficients for the C version */
dp->coeffs = (signed short *)calloc(len,sizeof(signed short));
for(j=0;j<len;j++)
dp->coeffs[j] = coeffs[j];
return (void *)dp;
}
/* Free a dot product descriptor created earlier */
void freedp_port(void *p){
struct dotprod *dp = (struct dotprod *)p;
if(dp->coeffs != NULL)
free(dp->coeffs);
free(dp);
}
/* Compute a dot product given a descriptor and an input array
* The length is taken from the descriptor
*/
long dotprod_port(void *p,signed short a[]){
struct dotprod *dp = (struct dotprod *)p;
long corr;
int i;
corr = 0;
for(i=0;i<dp->len;i++){
corr += (long)a[i] * dp->coeffs[i];
}
return corr;
}

72
libfec/dotprod_sse2.c Normal file
View File

@ -0,0 +1,72 @@
/* 16-bit signed integer dot product
* SSE2 version
* Copyright 2004 Phil Karn
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#define _XOPEN_SOURCE 600
#include <stdlib.h>
#include <memory.h>
#include "fec.h"
struct dotprod {
int len; /* Number of coefficients */
/* On a SSE2 machine, these hold 8 copies of the coefficients,
* preshifted by 0,1,..7 words to meet all possible input data
* alignments (see Intel ap559 on MMX dot products).
*/
signed short *coeffs[8];
};
long dotprod_sse2_assist(signed short *a,signed short *b,int cnt);
/* Create and return a descriptor for use with the dot product function */
void *initdp_sse2(signed short coeffs[],int len){
struct dotprod *dp;
int i,j,blksize;
if(len == 0)
return NULL;
dp = (struct dotprod *)calloc(1,sizeof(struct dotprod));
dp->len = len;
/* Make 8 copies of coefficients, one for each data alignment,
* each aligned to 16-byte boundary
*/
for(i=0;i<8;i++){
blksize = (1+(len+i-1)/8) * 8*sizeof(signed short);
posix_memalign((void **)&dp->coeffs[i],16,blksize);
memset(dp->coeffs[i],0,blksize);
for(j=0;j<len;j++)
dp->coeffs[i][j+i] = coeffs[j];
}
return (void *)dp;
}
/* Free a dot product descriptor created earlier */
void freedp_sse2(void *p){
struct dotprod *dp = (struct dotprod *)p;
int i;
for(i=0;i<8;i++)
if(dp->coeffs[i] != NULL)
free(dp->coeffs[i]);
free(dp);
}
/* Compute a dot product given a descriptor and an input array
* The length is taken from the descriptor
*/
long dotprod_sse2(void *p,signed short a[]){
struct dotprod *dp = (struct dotprod *)p;
int al;
signed short *ar;
ar = (signed short *)((int)a & ~15);
al = a - ar;
/* Call assembler routine to do the work, passing number of 8-word blocks */
return dotprod_sse2_assist(ar,dp->coeffs[al],(dp->len+al-1)/8+1);
}

View File

@ -0,0 +1,85 @@
# SIMD SSE2 dot product
# Equivalent to the following C code:
# long dotprod(signed short *a,signed short *b,int cnt)
# {
# long sum = 0;
# cnt *= 8;
# while(cnt--)
# sum += *a++ + *b++;
# return sum;
# }
# a and b must be 128-bit aligned
# Copyright 2001, Phil Karn KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
.global dotprod_sse2_assist
.type dotprod_sse2_assist,@function
dotprod_sse2_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %ecx
pushl %ebx
movl 8(%ebp),%esi # a
movl 12(%ebp),%edi # b
movl 16(%ebp),%ecx # cnt
pxor %xmm0,%xmm0 # clear running sum (in two 32-bit halves)
# SSE2 dot product loop unrolled 4 times, crunching 32 terms per loop
.align 16
.Loop1: subl $4,%ecx
jl .Loop1Done
movdqa (%esi),%xmm1
pmaddwd (%edi),%xmm1
paddd %xmm1,%xmm0
movdqa 16(%esi),%xmm1
pmaddwd 16(%edi),%xmm1
paddd %xmm1,%xmm0
movdqa 32(%esi),%xmm1
pmaddwd 32(%edi),%xmm1
paddd %xmm1,%xmm0
movdqa 48(%esi),%xmm1
addl $64,%esi
pmaddwd 48(%edi),%xmm1
addl $64,%edi
paddd %xmm1,%xmm0
jmp .Loop1
.Loop1Done:
addl $4,%ecx
# SSE2 dot product loop, not unrolled, crunching 4 terms per loop
# This could be redone as Duff's Device on the unrolled loop above
.Loop2: subl $1,%ecx
jl .Loop2Done
movdqa (%esi),%xmm1
addl $16,%esi
pmaddwd (%edi),%xmm1
addl $16,%edi
paddd %xmm1,%xmm0
jmp .Loop2
.Loop2Done:
movdqa %xmm0,%xmm1
psrldq $8,%xmm0
paddd %xmm1,%xmm0
movd %xmm0,%eax # right-hand word to eax
psrldq $4,%xmm0
movd %xmm0,%ebx
addl %ebx,%eax
popl %ebx
popl %ecx
popl %edi
popl %esi
movl %ebp,%esp
popl %ebp
ret

63
libfec/dsp.3 Normal file
View File

@ -0,0 +1,63 @@
.TH DSP 3
.SH NAME
initdp, freedp, dotprod, sumsq, peakval -\ SIMD-assisted
digital signal processing primitives
.SH SYNOPSIS
.nf
.ft
#include "fec.h"
void *initdp(signed short *coeffs,int len);
long dotprod(void *p,signed short *a);
void freedp(void *p);
unsigned long long sumsq(signed short *in,int cnt);
int peakval(signed short *b,int cnt);
.SH DESCRIPTION
These functions provide several basic primitives useful in digital
signal processing (DSP), especially in modems. The \fBinitdp\fR,
\fBdotprod\fR and \fBfreedp\fR functions implement an integer dot
product useful in correlation and filtering operations on signed
16-bit integers. \fBsumsq\fR computes the sum
of the squares of an array of signed 16-bit integers,
useful for measuring the energy of a signal. \fBpeakval\fR returns the
absolute value of the largest magitude element in the input array,
useful for scaling a signal's amplitude.
Each function uses IA32 or PowerPC Altivec instructions when
available; otherwise, a portable C version is used.
.SH USAGE
To create a FIR filter or correlator, call \fBinitdp\fR with the
coefficients in \fBcoeff\fR and their number in \fBlen\fR. This
creates the appropriate data structures and returns a handle.
To compute a dot product, pass the handle from \fBinitdp\fR and the
input array to \fBdotprod\fR. No length field is needed as the number
of samples will be taken from the \fBlen\fR parameter originally given
to \fBinitdp\fR. There must be at least as many samples in the input
array as there were coefficients passed to \fBinitdp\fR.
When the filter or correlator is no longer needed, the data structures
may be freed by passing the handle to \fBfreedp\fR.
The user is responsible for scaling the inputs to \fBinitdp\fR and
\fBdotprod\fR, as the 32-bit result from \fBdotprod\fR will silently
wrap around in the event of overflow.
To compute the sum of the squares of an array of signed 16-bit
integers, use sumsq\fR. This returns a 64 bit sum.
\fBpeakval\fR computes the absolute value of each 16-bit element in
the input array and returns the largest.
.SH RETURN VALUES
\fBinitdp\fR returns a handle that points to a control block, or NULL in
the event of an error (such as a memory allocation failure). \fBsumsq\fR
and \fBpeakval\fR have no error returns.
.SH AUTHOR and COPYRIGHT
Phil Karn, KA9Q (karn@ka9q.net)

99
libfec/dtest.c Normal file
View File

@ -0,0 +1,99 @@
/* Test dot-product function */
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <math.h>
#include "config.h"
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#include "fec.h"
#if HAVE_GETOPT_LONG
struct option Options[] = {
{"force-altivec",0,NULL,'a'},
{"force-port",0,NULL,'p'},
{"force-mmx",0,NULL,'m'},
{"force-sse",0,NULL,'s'},
{"force-sse2",0,NULL,'t'},
{"trials",0,NULL,'n'},
{NULL},
};
#endif
int main(int argc,char *argv[]){
short coeffs[512];
short input[2048];
int trials=1000,d;
int errors = 0;
#if HAVE_GETOPT_LONG
while((d = getopt_long(argc,argv,"apmstn:",Options,NULL)) != EOF){
#else
while((d = getopt(argc,argv,"apmstn:")) != EOF){
#endif
switch(d){
case 'a':
Cpu_mode = ALTIVEC;
break;
case 'p':
Cpu_mode = PORT;
break;
case 'm':
Cpu_mode = MMX;
break;
case 's':
Cpu_mode = SSE;
break;
case 't':
Cpu_mode = SSE2;
break;
case 'n':
trials = atoi(optarg);
break;
}
}
while(trials--){
long port_result;
long simd_result;
int ntaps;
int i;
int csum = 0;
int offset;
void *dp_simd,*dp_port;
/* Generate set of coefficients
* limit sum of absolute values to 32767 to avoid overflow
*/
memset(coeffs,0,sizeof(coeffs));
for(i=0;i<512;i++){
double gv;
gv = normal_rand(0.,100.);
if(csum + fabs(gv) > 32767)
break;
coeffs[i] = gv;
csum += fabs(gv);
}
ntaps = i;
/* Compare results to portable C version for a bunch of random data buffers and offsets */
dp_simd = initdp(coeffs,ntaps);
dp_port = initdp_port(coeffs,ntaps);
for(i=0;i<2048;i++)
input[i] = random();
offset = random() & 511;
simd_result = dotprod(dp_simd,input+offset);
port_result = dotprod_port(dp_port,input+offset);
if(simd_result != port_result){
errors++;
}
}
printf("dtest: %d errors\n",errors);
exit(0);
}

52
libfec/encode_rs.c Normal file
View File

@ -0,0 +1,52 @@
/* Reed-Solomon encoder
* Copyright 2002, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <string.h>
#ifdef FIXED
#include "fixed.h"
#elif defined(BIGSYM)
#include "int.h"
#else
#include "char.h"
#endif
void ENCODE_RS(
#ifdef FIXED
data_t *data, data_t *bb,int pad){
#else
void *p,data_t *data, data_t *bb){
struct rs *rs = (struct rs *)p;
#endif
int i, j;
data_t feedback;
#ifdef FIXED
/* Check pad parameter for validity */
if(pad < 0 || pad >= NN)
return;
#endif
memset(bb,0,NROOTS*sizeof(data_t));
for(i=0;i<NN-NROOTS-PAD;i++){
feedback = INDEX_OF[data[i] ^ bb[0]];
if(feedback != A0){ /* feedback term is non-zero */
#ifdef UNNORMALIZED
/* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must
* always be for the polynomials constructed by init_rs()
*/
feedback = MODNN(NN - GENPOLY[NROOTS] + feedback);
#endif
for(j=1;j<NROOTS;j++)
bb[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])];
}
/* Shift */
memmove(&bb[0],&bb[1],sizeof(data_t)*(NROOTS-1));
if(feedback != A0)
bb[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])];
else
bb[NROOTS-1] = 0;
}
}

58
libfec/encode_rs.h Normal file
View File

@ -0,0 +1,58 @@
/* The guts of the Reed-Solomon encoder, meant to be #included
* into a function body with the following typedefs, macros and variables supplied
* according to the code parameters:
* data_t - a typedef for the data symbol
* data_t data[] - array of NN-NROOTS-PAD and type data_t to be encoded
* data_t parity[] - an array of NROOTS and type data_t to be written with parity symbols
* NROOTS - the number of roots in the RS code generator polynomial,
* which is the same as the number of parity symbols in a block.
Integer variable or literal.
*
* NN - the total number of symbols in a RS block. Integer variable or literal.
* PAD - the number of pad symbols in a block. Integer variable or literal.
* ALPHA_TO - The address of an array of NN elements to convert Galois field
* elements in index (log) form to polynomial form. Read only.
* INDEX_OF - The address of an array of NN elements to convert Galois field
* elements in polynomial form to index (log) form. Read only.
* MODNN - a function to reduce its argument modulo NN. May be inline or a macro.
* GENPOLY - an array of NROOTS+1 elements containing the generator polynomial in index form
* The memset() and memmove() functions are used. The appropriate header
* file declaring these functions (usually <string.h>) must be included by the calling
* program.
* Copyright 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#undef A0
#define A0 (NN) /* Special reserved value encoding zero in index form */
{
int i, j;
data_t feedback;
memset(parity,0,NROOTS*sizeof(data_t));
for(i=0;i<NN-NROOTS-PAD;i++){
feedback = INDEX_OF[data[i] ^ parity[0]];
if(feedback != A0){ /* feedback term is non-zero */
#ifdef UNNORMALIZED
/* This line is unnecessary when GENPOLY[NROOTS] is unity, as it must
* always be for the polynomials constructed by init_rs()
*/
feedback = MODNN(NN - GENPOLY[NROOTS] + feedback);
#endif
for(j=1;j<NROOTS;j++)
parity[j] ^= ALPHA_TO[MODNN(feedback + GENPOLY[NROOTS-j])];
}
/* Shift */
memmove(&parity[0],&parity[1],sizeof(data_t)*(NROOTS-1));
if(feedback != A0)
parity[NROOTS-1] = ALPHA_TO[MODNN(feedback + GENPOLY[0])];
else
parity[NROOTS-1] = 0;
}
}

117
libfec/encode_rs_8.c Normal file
View File

@ -0,0 +1,117 @@
/* Reed-Solomon encoder
* Copyright 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <string.h>
#include "fixed.h"
#ifdef __VEC__
#include <sys/sysctl.h>
#endif
static enum {UNKNOWN=0,MMX,SSE,SSE2,ALTIVEC,PORT} cpu_mode;
static void encode_rs_8_c(data_t *data, data_t *parity,int pad);
#if __vec__
static void encode_rs_8_av(data_t *data, data_t *parity,int pad);
#endif
#if __i386__
int cpu_features(void);
#endif
void encode_rs_8(data_t *data, data_t *parity,int pad){
if(cpu_mode == UNKNOWN){
#ifdef __i386__
int f;
/* Figure out what kind of CPU we have */
f = cpu_features();
if(f & (1<<26)){ /* SSE2 is present */
cpu_mode = SSE2;
} else if(f & (1<<25)){ /* SSE is present */
cpu_mode = SSE;
} else if(f & (1<<23)){ /* MMX is present */
cpu_mode = MMX;
} else { /* No SIMD at all */
cpu_mode = PORT;
}
#elif __x86_64__
cpu_mode = SSE2;
#elif __VEC__
/* Ask the OS if we have Altivec support */
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
int hasVectorUnit = 0;
size_t length = sizeof(hasVectorUnit);
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
if(0 == error && hasVectorUnit)
cpu_mode = ALTIVEC;
else
cpu_mode = PORT;
#else
cpu_mode = PORT;
#endif
}
switch(cpu_mode){
#if __vec__
case ALTIVEC:
encode_rs_8_av(data,parity,pad);
return;
#endif
#if __i386__
case MMX:
case SSE:
case SSE2:
#endif
#ifdef __x86_64__
case SSE2:
#endif
default:
encode_rs_8_c(data,parity,pad);
return;
}
}
#if __vec__ /* PowerPC G4/G5 Altivec instructions are available */
static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
/* Lookup table for feedback multiplications
* These are the low half of the coefficients. Since the generator polynomial is
* palindromic, we form the other half by reversing this one
*/
extern static union { vector unsigned char v; unsigned char c[16]; } table[256];
static void encode_rs_8_av(data_t *data, data_t *parity,int pad){
union { vector unsigned char v[2]; unsigned char c[32]; } shift_register;
int i;
shift_register.v[0] = (vector unsigned char)(0);
shift_register.v[1] = (vector unsigned char)(0);
for(i=0;i<NN-NROOTS-pad;i++){
vector unsigned char feedback0,feedback1;
unsigned char f;
f = data[i] ^ shift_register.c[31];
feedback1 = table[f].v;
feedback0 = vec_perm(feedback1,feedback1,reverse);
/* Shift right one byte */
shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1;
shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0;
shift_register.c[0] = f;
}
for(i=0;i<NROOTS;i++)
parity[NROOTS-i-1] = shift_register.c[i];
}
#endif
/* Portable C version */
static void encode_rs_8_c(data_t *data, data_t *parity,int pad){
#include "encode_rs.h"
}

61
libfec/encode_rs_av.c Normal file
View File

@ -0,0 +1,61 @@
/* Fast Reed-Solomon encoder for (255,223) CCSDS code on PowerPC G4/G5 using Altivec instructions
* Copyright 2004, Phil Karn KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <string.h>
#include "fixed.h"
/* Lookup table for feedback multiplications
* These are the low half of the coefficients. Since the generator polynomial is
* palindromic, we form it by reversing these on the fly
*/
static union { vector unsigned char v; unsigned char c[16]; } table[256];
static vector unsigned char reverse = (vector unsigned char)(0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1);
static vector unsigned char shift_right = (vector unsigned char)(15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30);
extern data_t CCSDS_alpha_to[];
extern data_t CCSDS_index_of[];
extern data_t CCSDS_poly[];
void rs_init_av(){
int i,j;
/* The PowerPC is big-endian, so the low-order byte of each vector contains the highest order term in the polynomial */
for(j=0;j<16;j++){
table[0].c[j] = 0;
for(i=1;i<256;i++){
table[i].c[16-j-1] = CCSDS_alpha_to[MODNN(CCSDS_poly[j+1] + CCSDS_index_of[i])];
}
}
#if 0
for(i=0;i<256;i++){
printf("table[%3d] = %3vu\n",i,table[i].v);
}
#endif
}
void encode_rs_av(unsigned char *data,unsigned char *parity,int pad){
union { vector unsigned char v[2]; unsigned char c[32]; } shift_register;
int i;
shift_register.v[0] = (vector unsigned char)(0);
shift_register.v[1] = (vector unsigned char)(0);
for(i=0;i<NN-NROOTS-pad;i++){
vector unsigned char feedback0,feedback1;
unsigned char f;
f = data[i] ^ shift_register.c[31];
feedback1 = table[f].v;
feedback0 = vec_perm(feedback1,feedback1,reverse);
/* Shift right one byte */
shift_register.v[1] = vec_perm(shift_register.v[0],shift_register.v[1],shift_right) ^ feedback1;
shift_register.v[0] = vec_sro(shift_register.v[0],(vector unsigned char)(8)) ^ feedback0;
shift_register.c[0] = f;
}
for(i=0;i<NROOTS;i++)
parity[NROOTS-i-1] = shift_register.c[i];
}

24
libfec/encode_rs_ccsds.c Normal file
View File

@ -0,0 +1,24 @@
/* This function wraps around the fixed 8-bit encoder, performing the
* basis transformations necessary to meet the CCSDS standard
*
* Copyright 2002, Phil Karn, KA9Q
* fixed bug Aug 2007
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include "ccsds.h"
#include "fec.h"
void encode_rs_ccsds(data_t *data,data_t *parity,int pad){
int i;
data_t cdata[NN-NROOTS];
/* Convert data from dual basis to conventional */
for(i=0;i<NN-NROOTS-pad;i++)
cdata[i] = Tal1tab[data[i]];
encode_rs_8(cdata,parity,pad);
/* Convert parity from conventional to dual basis */
for(i=0;i<NROOTS;i++)
parity[i] = Taltab[parity[i]];
}

15
libfec/encode_rs_char.c Normal file
View File

@ -0,0 +1,15 @@
/* Reed-Solomon encoder
* Copyright 2002, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <string.h>
#include "char.h"
#include "rs-common.h"
void encode_rs_char(void *p,data_t *data, data_t *parity){
struct rs *rs = (struct rs *)p;
#include "encode_rs.h"
}

15
libfec/encode_rs_int.c Normal file
View File

@ -0,0 +1,15 @@
/* Reed-Solomon encoder
* Copyright 2003, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <string.h>
#include "int.h"
#include "rs-common.h"
void encode_rs_int(void *p,data_t *data, data_t *parity){
struct rs *rs = (struct rs *)p;
#include "encode_rs.h"
}

122
libfec/exercise.c Normal file
View File

@ -0,0 +1,122 @@
/* Exercise an RS codec a specified number of times using random
* data and error patterns
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#define FLAG_ERASURE 1 /* Randomly flag 50% of errors as erasures */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef FIXED
#include "fixed.h"
#define EXERCISE exercise_8
#elif defined(CCSDS)
#include "fixed.h"
#include "ccsds.h"
#define EXERCISE exercise_ccsds
#elif defined(BIGSYM)
#include "int.h"
#define EXERCISE exercise_int
#else
#include "char.h"
#define EXERCISE exercise_char
#endif
#ifdef FIXED
#define PRINTPARM printf("(255,223):");
#elif defined(CCSDS)
#define PRINTPARM printf("CCSDS (255,223):");
#else
#define PRINTPARM printf("(%d,%d):",rs->nn,rs->nn-rs->nroots);
#endif
/* Exercise the RS codec passed as an argument */
int EXERCISE(
#if !defined(CCSDS) && !defined(FIXED)
void *p,
#endif
int trials){
#if !defined(CCSDS) && !defined(FIXED)
struct rs *rs = (struct rs *)p;
#endif
data_t block[NN],tblock[NN];
int i;
int errors;
int errlocs[NN];
int derrlocs[NROOTS];
int derrors;
int errval,errloc;
int erasures;
int decoder_errors = 0;
while(trials-- != 0){
/* Test up to the error correction capacity of the code */
for(errors=0;errors <= NROOTS/2;errors++){
/* Load block with random data and encode */
for(i=0;i<NN-NROOTS;i++)
block[i] = random() & NN;
#if defined(CCSDS) || defined(FIXED)
ENCODE_RS(&block[0],&block[NN-NROOTS],0);
#else
ENCODE_RS(rs,&block[0],&block[NN-NROOTS]);
#endif
/* Make temp copy, seed with errors */
memcpy(tblock,block,sizeof(tblock));
memset(errlocs,0,sizeof(errlocs));
memset(derrlocs,0,sizeof(derrlocs));
erasures=0;
for(i=0;i<errors;i++){
do {
errval = random() & NN;
} while(errval == 0); /* Error value must be nonzero */
do {
errloc = random() % NN;
} while(errlocs[errloc] != 0); /* Must not choose the same location twice */
errlocs[errloc] = 1;
#if FLAG_ERASURE
if(random() & 1) /* 50-50 chance */
derrlocs[erasures++] = errloc;
#endif
tblock[errloc] ^= errval;
}
/* Decode the errored block */
#if defined(CCSDS) || defined(FIXED)
derrors = DECODE_RS(tblock,derrlocs,erasures,0);
#else
derrors = DECODE_RS(rs,tblock,derrlocs,erasures);
#endif
if(derrors != errors){
PRINTPARM
printf(" decoder says %d errors, true number is %d\n",derrors,errors);
decoder_errors++;
}
for(i=0;i<derrors;i++){
if(errlocs[derrlocs[i]] == 0){
PRINTPARM
printf(" decoder indicates error in location %d without error\n",derrlocs[i]);
decoder_errors++;
}
}
if(memcmp(tblock,block,sizeof(tblock)) != 0){
PRINTPARM
printf(" uncorrected errors! output ^ input:");
decoder_errors++;
for(i=0;i<NN;i++)
printf(" %02x",tblock[i] ^ block[i]);
printf("\n");
}
}
}
return decoder_errors;
}

66
libfec/fec.c Normal file
View File

@ -0,0 +1,66 @@
/* Utility routines for FEC support
* Copyright 2004, Phil Karn, KA9Q
*/
#include <stdio.h>
#include "fec.h"
unsigned char Partab[256];
int P_init;
/* Create 256-entry odd-parity lookup table
* Needed only on non-ia32 machines
*/
void partab_init(void){
int i,cnt,ti;
/* Initialize parity lookup table */
for(i=0;i<256;i++){
cnt = 0;
ti = i;
while(ti){
if(ti & 1)
cnt++;
ti >>= 1;
}
Partab[i] = cnt & 1;
}
P_init=1;
}
/* Lookup table giving count of 1 bits for integers 0-255 */
int Bitcnt[] = {
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7,
5, 6, 6, 7, 6, 7, 7, 8,
};

355
libfec/fec.h Normal file
View File

@ -0,0 +1,355 @@
/* User include file for libfec
* Copyright 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#ifndef _FEC_H_
#define _FEC_H_
/* r=1/2 k=7 convolutional encoder polynomials
* The NASA-DSN convention is to use V27POLYA inverted, then V27POLYB
* The CCSDS/NASA-GSFC convention is to use V27POLYB, then V27POLYA inverted
*/
#define V27POLYA 0x6d
#define V27POLYB 0x4f
void *create_viterbi27(int len);
void set_viterbi27_polynomial(int polys[2]);
int init_viterbi27(void *vp,int starting_state);
int update_viterbi27_blk(void *vp,unsigned char sym[],int npairs);
int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27(void *vp);
#ifdef __VEC__
void *create_viterbi27_av(int len);
void set_viterbi27_polynomial_av(int polys[2]);
int init_viterbi27_av(void *p,int starting_state);
int chainback_viterbi27_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27_av(void *p);
int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits);
#endif
#ifdef __i386__
void *create_viterbi27_mmx(int len);
void set_viterbi27_polynomial_mmx(int polys[2]);
int init_viterbi27_mmx(void *p,int starting_state);
int chainback_viterbi27_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27_mmx(void *p);
int update_viterbi27_blk_mmx(void *p,unsigned char *syms,int nbits);
void *create_viterbi27_sse(int len);
void set_viterbi27_polynomial_sse(int polys[2]);
int init_viterbi27_sse(void *p,int starting_state);
int chainback_viterbi27_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27_sse(void *p);
int update_viterbi27_blk_sse(void *p,unsigned char *syms,int nbits);
void *create_viterbi27_sse2(int len);
void set_viterbi27_polynomial_sse2(int polys[2]);
int init_viterbi27_sse2(void *p,int starting_state);
int chainback_viterbi27_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27_sse2(void *p);
int update_viterbi27_blk_sse2(void *p,unsigned char *syms,int nbits);
#endif
void *create_viterbi27_port(int len);
void set_viterbi27_polynomial_port(int polys[2]);
int init_viterbi27_port(void *p,int starting_state);
int chainback_viterbi27_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27_port(void *p);
int update_viterbi27_blk_port(void *p,unsigned char *syms,int nbits);
/* r=1/2 k=9 convolutional encoder polynomials */
#define V29POLYA 0x1af
#define V29POLYB 0x11d
void *create_viterbi29(int len);
void set_viterbi29_polynomial(int polys[2]);
int init_viterbi29(void *vp,int starting_state);
int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29(void *vp);
#ifdef __VEC__
void *create_viterbi29_av(int len);
void set_viterbi29_polynomial_av(int polys[2]);
int init_viterbi29_av(void *p,int starting_state);
int chainback_viterbi29_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29_av(void *p);
int update_viterbi29_blk_av(void *p,unsigned char *syms,int nbits);
#endif
#ifdef __i386__
void *create_viterbi29_mmx(int len);
void set_viterbi29_polynomial_mmx(int polys[2]);
int init_viterbi29_mmx(void *p,int starting_state);
int chainback_viterbi29_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29_mmx(void *p);
int update_viterbi29_blk_mmx(void *p,unsigned char *syms,int nbits);
void *create_viterbi29_sse(int len);
void set_viterbi29_polynomial_sse(int polys[2]);
int init_viterbi29_sse(void *p,int starting_state);
int chainback_viterbi29_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29_sse(void *p);
int update_viterbi29_blk_sse(void *p,unsigned char *syms,int nbits);
void *create_viterbi29_sse2(int len);
void set_viterbi29_polynomial_sse2(int polys[2]);
int init_viterbi29_sse2(void *p,int starting_state);
int chainback_viterbi29_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29_sse2(void *p);
int update_viterbi29_blk_sse2(void *p,unsigned char *syms,int nbits);
#endif
void *create_viterbi29_port(int len);
void set_viterbi29_polynomial_port(int polys[2]);
int init_viterbi29_port(void *p,int starting_state);
int chainback_viterbi29_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29_port(void *p);
int update_viterbi29_blk_port(void *p,unsigned char *syms,int nbits);
/* r=1/3 k=9 convolutional encoder polynomials */
#define V39POLYA 0x1ed
#define V39POLYB 0x19b
#define V39POLYC 0x127
void *create_viterbi39(int len);
void set_viterbi39_polynomial(int polys[3]);
int init_viterbi39(void *vp,int starting_state);
int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39(void *vp);
#ifdef __VEC__
void *create_viterbi39_av(int len);
void set_viterbi39_polynomial_av(int polys[3]);
int init_viterbi39_av(void *p,int starting_state);
int chainback_viterbi39_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39_av(void *p);
int update_viterbi39_blk_av(void *p,unsigned char *syms,int nbits);
#endif
#ifdef __i386__
void *create_viterbi39_mmx(int len);
void set_viterbi39_polynomial_mmx(int polys[3]);
int init_viterbi39_mmx(void *p,int starting_state);
int chainback_viterbi39_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39_mmx(void *p);
int update_viterbi39_blk_mmx(void *p,unsigned char *syms,int nbits);
void *create_viterbi39_sse(int len);
void set_viterbi39_polynomial_sse(int polys[3]);
int init_viterbi39_sse(void *p,int starting_state);
int chainback_viterbi39_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39_sse(void *p);
int update_viterbi39_blk_sse(void *p,unsigned char *syms,int nbits);
void *create_viterbi39_sse2(int len);
void set_viterbi39_polynomial_sse2(int polys[3]);
int init_viterbi39_sse2(void *p,int starting_state);
int chainback_viterbi39_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39_sse2(void *p);
int update_viterbi39_blk_sse2(void *p,unsigned char *syms,int nbits);
#endif
void *create_viterbi39_port(int len);
void set_viterbi39_polynomial_port(int polys[3]);
int init_viterbi39_port(void *p,int starting_state);
int chainback_viterbi39_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39_port(void *p);
int update_viterbi39_blk_port(void *p,unsigned char *syms,int nbits);
/* r=1/6 k=15 Cassini convolutional encoder polynomials without symbol inversion
* dfree = 56
* These bits may be left-right flipped from some textbook representations;
* here I have the bits entering the shift register from the right (low) end
*
* Some other spacecraft use the same code, but with the polynomials in a different order.
* E.g., Mars Pathfinder and STEREO swap POLYC and POLYD. All use alternate symbol inversion,
* so use set_viterbi615_polynomial() as appropriate.
*/
#define V615POLYA 042631
#define V615POLYB 047245
#define V615POLYC 056507
#define V615POLYD 073363
#define V615POLYE 077267
#define V615POLYF 064537
void *create_viterbi615(int len);
void set_viterbi615_polynomial(int polys[6]);
int init_viterbi615(void *vp,int starting_state);
int update_viterbi615_blk(void *vp,unsigned char *syms,int nbits);
int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615(void *vp);
#ifdef __VEC__
void *create_viterbi615_av(int len);
void set_viterbi615_polynomial_av(int polys[6]);
int init_viterbi615_av(void *p,int starting_state);
int chainback_viterbi615_av(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615_av(void *p);
int update_viterbi615_blk_av(void *p,unsigned char *syms,int nbits);
#endif
#ifdef __i386__
void *create_viterbi615_mmx(int len);
void set_viterbi615_polynomial_mmx(int polys[6]);
int init_viterbi615_mmx(void *p,int starting_state);
int chainback_viterbi615_mmx(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615_mmx(void *p);
int update_viterbi615_blk_mmx(void *p,unsigned char *syms,int nbits);
void *create_viterbi615_sse(int len);
void set_viterbi615_polynomial_sse(int polys[6]);
int init_viterbi615_sse(void *p,int starting_state);
int chainback_viterbi615_sse(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615_sse(void *p);
int update_viterbi615_blk_sse(void *p,unsigned char *syms,int nbits);
void *create_viterbi615_sse2(int len);
void set_viterbi615_polynomial_sse2(int polys[6]);
int init_viterbi615_sse2(void *p,int starting_state);
int chainback_viterbi615_sse2(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615_sse2(void *p);
int update_viterbi615_blk_sse2(void *p,unsigned char *syms,int nbits);
#endif
void *create_viterbi615_port(int len);
void set_viterbi615_polynomial_port(int polys[6]);
int init_viterbi615_port(void *p,int starting_state);
int chainback_viterbi615_port(void *p,unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615_port(void *p);
int update_viterbi615_blk_port(void *p,unsigned char *syms,int nbits);
/* General purpose RS codec, 8-bit symbols */
void encode_rs_char(void *rs,unsigned char *data,unsigned char *parity);
int decode_rs_char(void *rs,unsigned char *data,int *eras_pos,
int no_eras);
void *init_rs_char(int symsize,int gfpoly,
int fcr,int prim,int nroots,
int pad);
void free_rs_char(void *rs);
/* General purpose RS codec, integer symbols */
void encode_rs_int(void *rs,int *data,int *parity);
int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras);
void *init_rs_int(int symsize,int gfpoly,int fcr,
int prim,int nroots,int pad);
void free_rs_int(void *rs);
/* CCSDS standard (255,223) RS codec with conventional (*not* dual-basis)
* symbol representation
*/
void encode_rs_8(unsigned char *data,unsigned char *parity,int pad);
int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras,int pad);
/* CCSDS standard (255,223) RS codec with dual-basis symbol representation */
void encode_rs_ccsds(unsigned char *data,unsigned char *parity,int pad);
int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras,int pad);
/* Tables to map from conventional->dual (Taltab) and
* dual->conventional (Tal1tab) bases
*/
extern unsigned char Taltab[],Tal1tab[];
/* CPU SIMD instruction set available */
extern enum cpu_mode {UNKNOWN=0,PORT,MMX,SSE,SSE2,ALTIVEC} Cpu_mode;
void find_cpu_mode(void); /* Call this once at startup to set Cpu_mode */
/* Determine parity of argument: 1 = odd, 0 = even */
#if defined(__i386__) || defined(__x86_64__)
static inline int parityb(unsigned char x){
__asm__ __volatile__ ("test %1,%1;setpo %0" : "=q"(x) : "q" (x));
return x;
}
#else
void partab_init();
static inline int parityb(unsigned char x){
extern unsigned char Partab[256];
extern int P_init;
if(!P_init){
partab_init();
}
return Partab[x];
}
#endif
static inline int parity(int x){
/* Fold down to one byte */
x ^= (x >> 16);
x ^= (x >> 8);
return parityb(x);
}
/* Useful utilities for simulation */
double normal_rand(double mean, double std_dev);
unsigned char addnoise(int sym,double amp,double gain,double offset,int clip);
extern int Bitcnt[];
/* Dot product functions */
void *initdp(signed short coeffs[],int len);
void freedp(void *dp);
long dotprod(void *dp,signed short a[]);
void *initdp_port(signed short coeffs[],int len);
void freedp_port(void *dp);
long dotprod_port(void *dp,signed short a[]);
#ifdef __i386__
void *initdp_mmx(signed short coeffs[],int len);
void freedp_mmx(void *dp);
long dotprod_mmx(void *dp,signed short a[]);
void *initdp_sse(signed short coeffs[],int len);
void freedp_sse(void *dp);
long dotprod_sse(void *dp,signed short a[]);
void *initdp_sse2(signed short coeffs[],int len);
void freedp_sse2(void *dp);
long dotprod_sse2(void *dp,signed short a[]);
#endif
#ifdef __x86_64__
void *initdp_sse2(signed short coeffs[],int len);
void freedp_sse2(void *dp);
long dotprod_sse2(void *dp,signed short a[]);
#endif
#ifdef __VEC__
void *initdp_av(signed short coeffs[],int len);
void freedp_av(void *dp);
long dotprod_av(void *dp,signed short a[]);
#endif
/* Sum of squares - accepts signed shorts, produces unsigned long long */
unsigned long long sumsq(signed short *in,int cnt);
unsigned long long sumsq_port(signed short *in,int cnt);
#ifdef __i386__
unsigned long long sumsq_mmx(signed short *in,int cnt);
unsigned long long sumsq_sse(signed short *in,int cnt);
unsigned long long sumsq_sse2(signed short *in,int cnt);
#endif
#ifdef __x86_64__
unsigned long long sumsq_sse2(signed short *in,int cnt);
#endif
#ifdef __VEC__
unsigned long long sumsq_av(signed short *in,int cnt);
#endif
/* Low-level data structures and routines */
int cpu_features(void);
#endif /* _FEC_H_ */

33
libfec/fixed.h Normal file
View File

@ -0,0 +1,33 @@
/* Stuff specific to the CCSDS (255,223) RS codec
* (255,223) code over GF(256). Note: the conventional basis is still
* used; the dual-basis mappings are performed in [en|de]code_rs_ccsds.c
*
* Copyright 2003 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
typedef unsigned char data_t;
static inline int mod255(int x){
while (x >= 255) {
x -= 255;
x = (x >> 8) + (x & 255);
}
return x;
}
#define MODNN(x) mod255(x)
extern data_t CCSDS_alpha_to[];
extern data_t CCSDS_index_of[];
extern data_t CCSDS_poly[];
#define MM 8
#define NN 255
#define ALPHA_TO CCSDS_alpha_to
#define INDEX_OF CCSDS_index_of
#define GENPOLY CCSDS_poly
#define NROOTS 32
#define FCR 112
#define PRIM 11
#define IPRIM 116
#define PAD pad

39
libfec/gen_ccsds.c Normal file
View File

@ -0,0 +1,39 @@
/* Generate tables for CCSDS code
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "char.h"
#include "rs-common.h"
#include "fec.h"
int main(){
struct rs *rs;
int i;
rs = init_rs_char(8,0x187,112,11,32,0); /* CCSDS standard */
assert(rs != NULL);
printf("char CCSDS_alpha_to[] = {");
for(i=0;i<256;i++){
if((i % 16) == 0)
printf("\n");
printf("0x%02x,",rs->alpha_to[i]);
}
printf("\n};\n\nchar CCSDS_index_of[] = {");
for(i=0;i<256;i++){
if((i % 16) == 0)
printf("\n");
printf("%3d,",rs->index_of[i]);
}
printf("\n};\n\nchar CCSDS_poly[] = {");
for(i=0;i<33;i++){
if((i % 16) == 0)
printf("\n");
printf("%3d,",rs->genpoly[i]);
}
printf("\n};\n");
exit(0);
}

53
libfec/gen_ccsds_tal.c Normal file
View File

@ -0,0 +1,53 @@
/* Conversion lookup tables from conventional alpha to Berlekamp's
* dual-basis representation. Used in the CCSDS version only.
* taltab[] -- convert conventional to dual basis
* tal1tab[] -- convert dual basis to conventional
* Note: the actual RS encoder/decoder works with the conventional basis.
* So data is converted from dual to conventional basis before either
* encoding or decoding and then converted back.
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <stdlib.h>
#define DTYPE unsigned char
DTYPE Taltab[256],Tal1tab[256];
static DTYPE tal[] = { 0x8d, 0xef, 0xec, 0x86, 0xfa, 0x99, 0xaf, 0x7b };
/* Generate conversion lookup tables between conventional alpha representation
* (@**7, @**6, ...@**0)
* and Berlekamp's dual basis representation
* (l0, l1, ...l7)
*/
int main(){
int i,j,k;
for(i=0;i<256;i++){/* For each value of input */
Taltab[i] = 0;
for(j=0;j<8;j++) /* for each column of matrix */
for(k=0;k<8;k++){ /* for each row of matrix */
if(i & (1<<k))
Taltab[i] ^= tal[7-k] & (1<<j);
}
Tal1tab[Taltab[i]] = i;
}
printf("unsigned char Taltab[] = {\n");
for(i=0;i<256;i++){
if((i % 16) == 0)
printf("\n");
printf("0x%02x,",Taltab[i]);
}
printf("\n};\n\nunsigned char Tal1tab[] = {");
for(i=0;i<256;i++){
if((i % 16) == 0)
printf("\n");
printf("0x%02x,",Tal1tab[i]);
}
printf("\n};\n");
exit(0);
}

39
libfec/init_rs.c Normal file
View File

@ -0,0 +1,39 @@
/* Initialize a RS codec
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
#if !defined(NULL)
#define NULL ((void *)0)
#endif
#include "rs-common.h"
void free_rs(void *p){
struct rs *rs = (struct rs *)p;
free(rs->alpha_to);
free(rs->index_of);
free(rs->genpoly);
free(rs);
}
/* Initialize a Reed-Solomon codec
* symsize = symbol size, bits
* gfpoly = Field generator polynomial coefficients
* fcr = first root of RS code generator polynomial, index form
* prim = primitive element to generate polynomial roots
* nroots = RS code generator polynomial degree (number of roots)
* pad = padding bytes at front of shortened block
*/
void *init_rs_common(int symsize,int gfpoly,int fcr,int prim,
int nroots,int pad){
struct rs *rs;
#include "init_rs.h"
return rs;
}

106
libfec/init_rs.h Normal file
View File

@ -0,0 +1,106 @@
/* Common code for intializing a Reed-Solomon control block (char or int symbols)
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#undef NULL
#define NULL ((void *)0)
{
int i, j, sr,root,iprim;
rs = NULL;
/* Check parameter ranges */
if(symsize < 0 || symsize > 8*sizeof(data_t)){
goto done;
}
if(fcr < 0 || fcr >= (1<<symsize))
goto done;
if(prim <= 0 || prim >= (1<<symsize))
goto done;
if(nroots < 0 || nroots >= (1<<symsize))
goto done; /* Can't have more roots than symbol values! */
if(pad < 0 || pad >= ((1<<symsize) -1 - nroots))
goto done; /* Too much padding */
rs = (struct rs *)calloc(1,sizeof(struct rs));
if(rs == NULL)
goto done;
rs->mm = symsize;
rs->nn = (1<<symsize)-1;
rs->pad = pad;
rs->alpha_to = (data_t *)malloc(sizeof(data_t)*(rs->nn+1));
if(rs->alpha_to == NULL){
free(rs);
rs = NULL;
goto done;
}
rs->index_of = (data_t *)malloc(sizeof(data_t)*(rs->nn+1));
if(rs->index_of == NULL){
free(rs->alpha_to);
free(rs);
rs = NULL;
goto done;
}
/* Generate Galois field lookup tables */
rs->index_of[0] = A0; /* log(zero) = -inf */
rs->alpha_to[A0] = 0; /* alpha**-inf = 0 */
sr = 1;
for(i=0;i<rs->nn;i++){
rs->index_of[sr] = i;
rs->alpha_to[i] = sr;
sr <<= 1;
if(sr & (1<<symsize))
sr ^= gfpoly;
sr &= rs->nn;
}
if(sr != 1){
/* field generator polynomial is not primitive! */
free(rs->alpha_to);
free(rs->index_of);
free(rs);
rs = NULL;
goto done;
}
/* Form RS code generator polynomial from its roots */
rs->genpoly = (data_t *)malloc(sizeof(data_t)*(nroots+1));
if(rs->genpoly == NULL){
free(rs->alpha_to);
free(rs->index_of);
free(rs);
rs = NULL;
goto done;
}
rs->fcr = fcr;
rs->prim = prim;
rs->nroots = nroots;
/* Find prim-th root of 1, used in decoding */
for(iprim=1;(iprim % prim) != 0;iprim += rs->nn)
;
rs->iprim = iprim / prim;
rs->genpoly[0] = 1;
for (i = 0,root=fcr*prim; i < nroots; i++,root += prim) {
rs->genpoly[i+1] = 1;
/* Multiply rs->genpoly[] by @**(root + x) */
for (j = i; j > 0; j--){
if (rs->genpoly[j] != 0)
rs->genpoly[j] = rs->genpoly[j-1] ^ rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[j]] + root)];
else
rs->genpoly[j] = rs->genpoly[j-1];
}
/* rs->genpoly[0] can never be zero */
rs->genpoly[0] = rs->alpha_to[modnn(rs,rs->index_of[rs->genpoly[0]] + root)];
}
/* convert rs->genpoly[] to index form for quicker encoding */
for (i = 0; i <= nroots; i++)
rs->genpoly[i] = rs->index_of[rs->genpoly[i]];
done:;
}

35
libfec/init_rs_char.c Normal file
View File

@ -0,0 +1,35 @@
/* Initialize a RS codec
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "char.h"
#include "rs-common.h"
void free_rs_char(void *p){
struct rs *rs = (struct rs *)p;
free(rs->alpha_to);
free(rs->index_of);
free(rs->genpoly);
free(rs);
}
/* Initialize a Reed-Solomon codec
* symsize = symbol size, bits
* gfpoly = Field generator polynomial coefficients
* fcr = first root of RS code generator polynomial, index form
* prim = primitive element to generate polynomial roots
* nroots = RS code generator polynomial degree (number of roots)
* pad = padding bytes at front of shortened block
*/
void *init_rs_char(int symsize,int gfpoly,int fcr,int prim,
int nroots,int pad){
struct rs *rs;
#include "init_rs.h"
return rs;
}

35
libfec/init_rs_int.c Normal file
View File

@ -0,0 +1,35 @@
/* Initialize a RS codec
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "int.h"
#include "rs-common.h"
void free_rs_int(void *p){
struct rs *rs = (struct rs *)p;
free(rs->alpha_to);
free(rs->index_of);
free(rs->genpoly);
free(rs);
}
/* Initialize a Reed-Solomon codec
* symsize = symbol size, bits
* gfpoly = Field generator polynomial coefficients
* fcr = first root of RS code generator polynomial, index form
* prim = primitive element to generate polynomial roots
* nroots = RS code generator polynomial degree (number of roots)
* pad = padding bytes at front of shortened block
*/
void *init_rs_int(int symsize,int gfpoly,int fcr,int prim,
int nroots,int pad){
struct rs *rs;
#include "init_rs.h"
return rs;
}

251
libfec/install-sh Executable file
View File

@ -0,0 +1,251 @@
#!/bin/sh
#
# install - install a program, script, or datafile
# This comes from X11R5 (mit/util/scripts/install.sh).
#
# Copyright 1991 by the Massachusetts Institute of Technology
#
# Permission to use, copy, modify, distribute, and sell this software and its
# documentation for any purpose is hereby granted without fee, provided that
# the above copyright notice appear in all copies and that both that
# copyright notice and this permission notice appear in supporting
# documentation, and that the name of M.I.T. not be used in advertising or
# publicity pertaining to distribution of the software without specific,
# written prior permission. M.I.T. makes no representations about the
# suitability of this software for any purpose. It is provided "as is"
# without express or implied warranty.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# `make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch. It can only install one file at a time, a restriction
# shared with many OS's install programs.
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit="${DOITPROG-}"
# put in absolute paths if you don't have them in your path; or use env. vars.
mvprog="${MVPROG-mv}"
cpprog="${CPPROG-cp}"
chmodprog="${CHMODPROG-chmod}"
chownprog="${CHOWNPROG-chown}"
chgrpprog="${CHGRPPROG-chgrp}"
stripprog="${STRIPPROG-strip}"
rmprog="${RMPROG-rm}"
mkdirprog="${MKDIRPROG-mkdir}"
transformbasename=""
transform_arg=""
instcmd="$mvprog"
chmodcmd="$chmodprog 0755"
chowncmd=""
chgrpcmd=""
stripcmd=""
rmcmd="$rmprog -f"
mvcmd="$mvprog"
src=""
dst=""
dir_arg=""
while [ x"$1" != x ]; do
case $1 in
-c) instcmd="$cpprog"
shift
continue;;
-d) dir_arg=true
shift
continue;;
-m) chmodcmd="$chmodprog $2"
shift
shift
continue;;
-o) chowncmd="$chownprog $2"
shift
shift
continue;;
-g) chgrpcmd="$chgrpprog $2"
shift
shift
continue;;
-s) stripcmd="$stripprog"
shift
continue;;
-t=*) transformarg=`echo $1 | sed 's/-t=//'`
shift
continue;;
-b=*) transformbasename=`echo $1 | sed 's/-b=//'`
shift
continue;;
*) if [ x"$src" = x ]
then
src=$1
else
# this colon is to work around a 386BSD /bin/sh bug
:
dst=$1
fi
shift
continue;;
esac
done
if [ x"$src" = x ]
then
echo "install: no input file specified"
exit 1
else
true
fi
if [ x"$dir_arg" != x ]; then
dst=$src
src=""
if [ -d $dst ]; then
instcmd=:
chmodcmd=""
else
instcmd=mkdir
fi
else
# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if [ -f $src -o -d $src ]
then
true
else
echo "install: $src does not exist"
exit 1
fi
if [ x"$dst" = x ]
then
echo "install: no destination specified"
exit 1
else
true
fi
# If destination is a directory, append the input filename; if your system
# does not like double slashes in filenames, you may need to add some logic
if [ -d $dst ]
then
dst="$dst"/`basename $src`
else
true
fi
fi
## this sed command emulates the dirname command
dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
# Make sure that the destination directory exists.
# this part is taken from Noah Friedman's mkinstalldirs script
# Skip lots of stat calls in the usual case.
if [ ! -d "$dstdir" ]; then
defaultIFS='
'
IFS="${IFS-${defaultIFS}}"
oIFS="${IFS}"
# Some sh's can't handle IFS=/ for some reason.
IFS='%'
set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
IFS="${oIFS}"
pathcomp=''
while [ $# -ne 0 ] ; do
pathcomp="${pathcomp}${1}"
shift
if [ ! -d "${pathcomp}" ] ;
then
$mkdirprog "${pathcomp}"
else
true
fi
pathcomp="${pathcomp}/"
done
fi
if [ x"$dir_arg" != x ]
then
$doit $instcmd $dst &&
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
else
# If we're going to rename the final executable, determine the name now.
if [ x"$transformarg" = x ]
then
dstfile=`basename $dst`
else
dstfile=`basename $dst $transformbasename |
sed $transformarg`$transformbasename
fi
# don't allow the sed command to completely eliminate the filename
if [ x"$dstfile" = x ]
then
dstfile=`basename $dst`
else
true
fi
# Make a temp file name in the proper directory.
dsttmp=$dstdir/#inst.$$#
# Move or copy the file name to the temp name
$doit $instcmd $src $dsttmp &&
trap "rm -f ${dsttmp}" 0 &&
# and set any options; do chmod last to preserve setuid bits
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $instcmd $src $dsttmp" command.
if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
# Now rename the file to the real destination.
$doit $rmcmd -f $dstdir/$dstfile &&
$doit $mvcmd $dsttmp $dstdir/$dstfile
fi &&
exit 0

22
libfec/int.h Normal file
View File

@ -0,0 +1,22 @@
/* Stuff specific to the general (integer) version of the Reed-Solomon codecs
*
* Copyright 2003, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
typedef unsigned int data_t;
#define MODNN(x) modnn(rs,x)
#define MM (rs->mm)
#define NN (rs->nn)
#define ALPHA_TO (rs->alpha_to)
#define INDEX_OF (rs->index_of)
#define GENPOLY (rs->genpoly)
#define NROOTS (rs->nroots)
#define FCR (rs->fcr)
#define PRIM (rs->prim)
#define IPRIM (rs->iprim)
#define PAD (rs->pad)
#define A0 (NN)

504
libfec/lesser.txt Normal file
View File

@ -0,0 +1,504 @@
GNU LESSER GENERAL PUBLIC LICENSE
Version 2.1, February 1999
Copyright (C) 1991, 1999 Free Software Foundation, Inc.
59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
[This is the first released version of the Lesser GPL. It also counts
as the successor of the GNU Library Public License, version 2, hence
the version number 2.1.]
Preamble
The licenses for most software are designed to take away your
freedom to share and change it. By contrast, the GNU General Public
Licenses are intended to guarantee your freedom to share and change
free software--to make sure the software is free for all its users.
This license, the Lesser General Public License, applies to some
specially designated software packages--typically libraries--of the
Free Software Foundation and other authors who decide to use it. You
can use it too, but we suggest you first think carefully about whether
this license or the ordinary General Public License is the better
strategy to use in any particular case, based on the explanations below.
When we speak of free software, we are referring to freedom of use,
not price. Our General Public Licenses are designed to make sure that
you have the freedom to distribute copies of free software (and charge
for this service if you wish); that you receive source code or can get
it if you want it; that you can change the software and use pieces of
it in new free programs; and that you are informed that you can do
these things.
To protect your rights, we need to make restrictions that forbid
distributors to deny you these rights or to ask you to surrender these
rights. These restrictions translate to certain responsibilities for
you if you distribute copies of the library or if you modify it.
For example, if you distribute copies of the library, whether gratis
or for a fee, you must give the recipients all the rights that we gave
you. You must make sure that they, too, receive or can get the source
code. If you link other code with the library, you must provide
complete object files to the recipients, so that they can relink them
with the library after making changes to the library and recompiling
it. And you must show them these terms so they know their rights.
We protect your rights with a two-step method: (1) we copyright the
library, and (2) we offer you this license, which gives you legal
permission to copy, distribute and/or modify the library.
To protect each distributor, we want to make it very clear that
there is no warranty for the free library. Also, if the library is
modified by someone else and passed on, the recipients should know
that what they have is not the original version, so that the original
author's reputation will not be affected by problems that might be
introduced by others.
Finally, software patents pose a constant threat to the existence of
any free program. We wish to make sure that a company cannot
effectively restrict the users of a free program by obtaining a
restrictive license from a patent holder. Therefore, we insist that
any patent license obtained for a version of the library must be
consistent with the full freedom of use specified in this license.
Most GNU software, including some libraries, is covered by the
ordinary GNU General Public License. This license, the GNU Lesser
General Public License, applies to certain designated libraries, and
is quite different from the ordinary General Public License. We use
this license for certain libraries in order to permit linking those
libraries into non-free programs.
When a program is linked with a library, whether statically or using
a shared library, the combination of the two is legally speaking a
combined work, a derivative of the original library. The ordinary
General Public License therefore permits such linking only if the
entire combination fits its criteria of freedom. The Lesser General
Public License permits more lax criteria for linking other code with
the library.
We call this license the "Lesser" General Public License because it
does Less to protect the user's freedom than the ordinary General
Public License. It also provides other free software developers Less
of an advantage over competing non-free programs. These disadvantages
are the reason we use the ordinary General Public License for many
libraries. However, the Lesser license provides advantages in certain
special circumstances.
For example, on rare occasions, there may be a special need to
encourage the widest possible use of a certain library, so that it becomes
a de-facto standard. To achieve this, non-free programs must be
allowed to use the library. A more frequent case is that a free
library does the same job as widely used non-free libraries. In this
case, there is little to gain by limiting the free library to free
software only, so we use the Lesser General Public License.
In other cases, permission to use a particular library in non-free
programs enables a greater number of people to use a large body of
free software. For example, permission to use the GNU C Library in
non-free programs enables many more people to use the whole GNU
operating system, as well as its variant, the GNU/Linux operating
system.
Although the Lesser General Public License is Less protective of the
users' freedom, it does ensure that the user of a program that is
linked with the Library has the freedom and the wherewithal to run
that program using a modified version of the Library.
The precise terms and conditions for copying, distribution and
modification follow. Pay close attention to the difference between a
"work based on the library" and a "work that uses the library". The
former contains code derived from the library, whereas the latter must
be combined with the library in order to run.
GNU LESSER GENERAL PUBLIC LICENSE
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
0. This License Agreement applies to any software library or other
program which contains a notice placed by the copyright holder or
other authorized party saying it may be distributed under the terms of
this Lesser General Public License (also called "this License").
Each licensee is addressed as "you".
A "library" means a collection of software functions and/or data
prepared so as to be conveniently linked with application programs
(which use some of those functions and data) to form executables.
The "Library", below, refers to any such software library or work
which has been distributed under these terms. A "work based on the
Library" means either the Library or any derivative work under
copyright law: that is to say, a work containing the Library or a
portion of it, either verbatim or with modifications and/or translated
straightforwardly into another language. (Hereinafter, translation is
included without limitation in the term "modification".)
"Source code" for a work means the preferred form of the work for
making modifications to it. For a library, complete source code means
all the source code for all modules it contains, plus any associated
interface definition files, plus the scripts used to control compilation
and installation of the library.
Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope. The act of
running a program using the Library is not restricted, and output from
such a program is covered only if its contents constitute a work based
on the Library (independent of the use of the Library in a tool for
writing it). Whether that is true depends on what the Library does
and what the program that uses the Library does.
1. You may copy and distribute verbatim copies of the Library's
complete source code as you receive it, in any medium, provided that
you conspicuously and appropriately publish on each copy an
appropriate copyright notice and disclaimer of warranty; keep intact
all the notices that refer to this License and to the absence of any
warranty; and distribute a copy of this License along with the
Library.
You may charge a fee for the physical act of transferring a copy,
and you may at your option offer warranty protection in exchange for a
fee.
2. You may modify your copy or copies of the Library or any portion
of it, thus forming a work based on the Library, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:
a) The modified work must itself be a software library.
b) You must cause the files modified to carry prominent notices
stating that you changed the files and the date of any change.
c) You must cause the whole of the work to be licensed at no
charge to all third parties under the terms of this License.
d) If a facility in the modified Library refers to a function or a
table of data to be supplied by an application program that uses
the facility, other than as an argument passed when the facility
is invoked, then you must make a good faith effort to ensure that,
in the event an application does not supply such function or
table, the facility still operates, and performs whatever part of
its purpose remains meaningful.
(For example, a function in a library to compute square roots has
a purpose that is entirely well-defined independent of the
application. Therefore, Subsection 2d requires that any
application-supplied function or table used by this function must
be optional: if the application does not supply it, the square
root function must still compute square roots.)
These requirements apply to the modified work as a whole. If
identifiable sections of that work are not derived from the Library,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works. But when you
distribute the same sections as part of a whole which is a work based
on the Library, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote
it.
Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Library.
In addition, mere aggregation of another work not based on the Library
with the Library (or with a work based on the Library) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.
3. You may opt to apply the terms of the ordinary GNU General Public
License instead of this License to a given copy of the Library. To do
this, you must alter all the notices that refer to this License, so
that they refer to the ordinary GNU General Public License, version 2,
instead of to this License. (If a newer version than version 2 of the
ordinary GNU General Public License has appeared, then you can specify
that version instead if you wish.) Do not make any other change in
these notices.
Once this change is made in a given copy, it is irreversible for
that copy, so the ordinary GNU General Public License applies to all
subsequent copies and derivative works made from that copy.
This option is useful when you wish to copy part of the code of
the Library into a program that is not a library.
4. You may copy and distribute the Library (or a portion or
derivative of it, under Section 2) in object code or executable form
under the terms of Sections 1 and 2 above provided that you accompany
it with the complete corresponding machine-readable source code, which
must be distributed under the terms of Sections 1 and 2 above on a
medium customarily used for software interchange.
If distribution of object code is made by offering access to copy
from a designated place, then offering equivalent access to copy the
source code from the same place satisfies the requirement to
distribute the source code, even though third parties are not
compelled to copy the source along with the object code.
5. A program that contains no derivative of any portion of the
Library, but is designed to work with the Library by being compiled or
linked with it, is called a "work that uses the Library". Such a
work, in isolation, is not a derivative work of the Library, and
therefore falls outside the scope of this License.
However, linking a "work that uses the Library" with the Library
creates an executable that is a derivative of the Library (because it
contains portions of the Library), rather than a "work that uses the
library". The executable is therefore covered by this License.
Section 6 states terms for distribution of such executables.
When a "work that uses the Library" uses material from a header file
that is part of the Library, the object code for the work may be a
derivative work of the Library even though the source code is not.
Whether this is true is especially significant if the work can be
linked without the Library, or if the work is itself a library. The
threshold for this to be true is not precisely defined by law.
If such an object file uses only numerical parameters, data
structure layouts and accessors, and small macros and small inline
functions (ten lines or less in length), then the use of the object
file is unrestricted, regardless of whether it is legally a derivative
work. (Executables containing this object code plus portions of the
Library will still fall under Section 6.)
Otherwise, if the work is a derivative of the Library, you may
distribute the object code for the work under the terms of Section 6.
Any executables containing that work also fall under Section 6,
whether or not they are linked directly with the Library itself.
6. As an exception to the Sections above, you may also combine or
link a "work that uses the Library" with the Library to produce a
work containing portions of the Library, and distribute that work
under terms of your choice, provided that the terms permit
modification of the work for the customer's own use and reverse
engineering for debugging such modifications.
You must give prominent notice with each copy of the work that the
Library is used in it and that the Library and its use are covered by
this License. You must supply a copy of this License. If the work
during execution displays copyright notices, you must include the
copyright notice for the Library among them, as well as a reference
directing the user to the copy of this License. Also, you must do one
of these things:
a) Accompany the work with the complete corresponding
machine-readable source code for the Library including whatever
changes were used in the work (which must be distributed under
Sections 1 and 2 above); and, if the work is an executable linked
with the Library, with the complete machine-readable "work that
uses the Library", as object code and/or source code, so that the
user can modify the Library and then relink to produce a modified
executable containing the modified Library. (It is understood
that the user who changes the contents of definitions files in the
Library will not necessarily be able to recompile the application
to use the modified definitions.)
b) Use a suitable shared library mechanism for linking with the
Library. A suitable mechanism is one that (1) uses at run time a
copy of the library already present on the user's computer system,
rather than copying library functions into the executable, and (2)
will operate properly with a modified version of the library, if
the user installs one, as long as the modified version is
interface-compatible with the version that the work was made with.
c) Accompany the work with a written offer, valid for at
least three years, to give the same user the materials
specified in Subsection 6a, above, for a charge no more
than the cost of performing this distribution.
d) If distribution of the work is made by offering access to copy
from a designated place, offer equivalent access to copy the above
specified materials from the same place.
e) Verify that the user has already received a copy of these
materials or that you have already sent this user a copy.
For an executable, the required form of the "work that uses the
Library" must include any data and utility programs needed for
reproducing the executable from it. However, as a special exception,
the materials to be distributed need not include anything that is
normally distributed (in either source or binary form) with the major
components (compiler, kernel, and so on) of the operating system on
which the executable runs, unless that component itself accompanies
the executable.
It may happen that this requirement contradicts the license
restrictions of other proprietary libraries that do not normally
accompany the operating system. Such a contradiction means you cannot
use both them and the Library together in an executable that you
distribute.
7. You may place library facilities that are a work based on the
Library side-by-side in a single library together with other library
facilities not covered by this License, and distribute such a combined
library, provided that the separate distribution of the work based on
the Library and of the other library facilities is otherwise
permitted, and provided that you do these two things:
a) Accompany the combined library with a copy of the same work
based on the Library, uncombined with any other library
facilities. This must be distributed under the terms of the
Sections above.
b) Give prominent notice with the combined library of the fact
that part of it is a work based on the Library, and explaining
where to find the accompanying uncombined form of the same work.
8. You may not copy, modify, sublicense, link with, or distribute
the Library except as expressly provided under this License. Any
attempt otherwise to copy, modify, sublicense, link with, or
distribute the Library is void, and will automatically terminate your
rights under this License. However, parties who have received copies,
or rights, from you under this License will not have their licenses
terminated so long as such parties remain in full compliance.
9. You are not required to accept this License, since you have not
signed it. However, nothing else grants you permission to modify or
distribute the Library or its derivative works. These actions are
prohibited by law if you do not accept this License. Therefore, by
modifying or distributing the Library (or any work based on the
Library), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Library or works based on it.
10. Each time you redistribute the Library (or any work based on the
Library), the recipient automatically receives a license from the
original licensor to copy, distribute, link with or modify the Library
subject to these terms and conditions. You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties with
this License.
11. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Library at all. For example, if a patent
license would not permit royalty-free redistribution of the Library by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Library.
If any portion of this section is held invalid or unenforceable under any
particular circumstance, the balance of the section is intended to apply,
and the section as a whole is intended to apply in other circumstances.
It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system which is
implemented by public license practices. Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.
This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.
12. If the distribution and/or use of the Library is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Library under this License may add
an explicit geographical distribution limitation excluding those countries,
so that distribution is permitted only in or among countries not thus
excluded. In such case, this License incorporates the limitation as if
written in the body of this License.
13. The Free Software Foundation may publish revised and/or new
versions of the Lesser General Public License from time to time.
Such new versions will be similar in spirit to the present version,
but may differ in detail to address new problems or concerns.
Each version is given a distinguishing version number. If the Library
specifies a version number of this License which applies to it and
"any later version", you have the option of following the terms and
conditions either of that version or of any later version published by
the Free Software Foundation. If the Library does not specify a
license version number, you may choose any version ever published by
the Free Software Foundation.
14. If you wish to incorporate parts of the Library into other free
programs whose distribution conditions are incompatible with these,
write to the author to ask for permission. For software which is
copyrighted by the Free Software Foundation, write to the Free
Software Foundation; we sometimes make exceptions for this. Our
decision will be guided by the two goals of preserving the free status
of all derivatives of our free software and of promoting the sharing
and reuse of software generally.
NO WARRANTY
15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
DAMAGES.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Libraries
If you develop a new library, and you want it to be of the greatest
possible use to the public, we recommend making it free software that
everyone can redistribute and change. You can do so by permitting
redistribution under these terms (or, alternatively, under the terms of the
ordinary General Public License).
To apply these terms, attach the following notices to the library. It is
safest to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least the
"copyright" line and a pointer to where the full notice is found.
<one line to give the library's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Also add information on how to contact you by electronic and paper mail.
You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the library, if
necessary. Here is a sample; alter the names:
Yoyodyne, Inc., hereby disclaims all copyright interest in the
library `Frob' (a library for tweaking knobs) written by James Random Hacker.
<signature of Ty Coon>, 1 April 1990
Ty Coon, President of Vice
That's all there is to it!

13
libfec/libfec.pc.in Normal file
View File

@ -0,0 +1,13 @@
prefix=@LIBFEC_PC_PREFIX@
exec_prefix=@LIBFEC_PC_EXEC_PREFIX@
libdir=@LIBFEC_PC_LIBDIR@
includedir=@LIBFEC_PC_INCLUDEDIR@
Name: FEC library
Description: A fork of KA9Q's FEC library
Version: @LIBFEC_PC_VERSION@
URL: http://opendigitalradio.org
Cflags: -I${includedir}/ @LIBFEC_PC_CFLAGS@
Libs: -L${libdir}/ @LIBFEC_PC_LIBS@
Libs.private: @LIBFEC_PC_PRIV_LIBS@

249
libfec/makefile.in Normal file
View File

@ -0,0 +1,249 @@
# Makefile prototype for configure
# Copyright 2004 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
# @configure_input@
srcdir = @srcdir@
prefix = @prefix@
exec_prefix=@exec_prefix@
VPATH = @srcdir@
CC=@CC@
LIBS=@MLIBS@ fec.o sim.o viterbi27.o viterbi27_port.o viterbi29.o viterbi29_port.o \
viterbi39.o viterbi39_port.o \
viterbi615.o viterbi615_port.o encode_rs_char.o encode_rs_int.o encode_rs_8.o \
decode_rs_char.o decode_rs_int.o decode_rs_8.o \
init_rs_char.o init_rs_int.o ccsds_tab.o \
encode_rs_ccsds.o decode_rs_ccsds.o ccsds_tal.o \
dotprod.o dotprod_port.o \
peakval.o peakval_port.o \
sumsq.o sumsq_port.o
CFLAGS=@CFLAGS@ -I. -fPIC -Wall @ARCH_OPTION@
SHARED_LIB=@SH_LIB@
all: libfec.a $(SHARED_LIB)
test: vtest27 vtest29 vtest39 vtest615 rstest dtest sumsq_test peaktest
@echo "Correctness tests:"
./vtest27 -e 3.0 -n 1000 -v
./vtest29 -e 2.5 -n 1000 -v
./vtest39 -e 2.5 -n 1000 -v
./vtest615 -e 1.0 -n 100 -v
./rstest
./dtest
./sumsq_test
./peaktest
@echo "Speed tests:"
./vtest27
./vtest29
./vtest39
./vtest615
install: all
mkdir -p $(DESTDIR)@libdir@
install -m 644 -p $(SHARED_LIB) libfec.a $(DESTDIR)@libdir@
# (cd $(DESTDIR)@libdir@;ln -f -s $(SHARED_LIB) libfec.so)
@REBIND@
mkdir -p $(DESTDIR)@includedir@
install -m 644 -p fec.h $(DESTDIR)@includedir@
mkdir -m 0755 -p $(DESTDIR)@mandir@/man3
install -m 644 -p simd-viterbi.3 rs.3 dsp.3 $(DESTDIR)@mandir@/man3
peaktest: peaktest.o libfec.a
gcc $(CFLAGS) -g -o $@ $^
sumsq_test: sumsq_test.o libfec.a
gcc $(CFLAGS) -g -o $@ $^
dtest: dtest.o libfec.a
gcc $(CFLAGS) -g -o $@ $^ -lm
vtest27: vtest27.o libfec.a
gcc $(CFLAGS) -g -o $@ $^ -lm
vtest29: vtest29.o libfec.a
gcc $(CFLAGS) -g -o $@ $^ -lm
vtest39: vtest39.o libfec.a
gcc $(CFLAGS) -g -o $@ $^ -lm
vtest615: vtest615.o libfec.a
gcc $(CFLAGS) -g -o $@ $^ -lm
rstest: rstest.o libfec.a
gcc $(CFLAGS) -g -o $@ $^
rs_speedtest: rs_speedtest.o libfec.a
gcc $(CFLAGS) -g -o $@ $^
# for some reason, the test programs without args segfault on the PPC with -O2 optimization. Dunno why - compiler bug?
vtest27.o: vtest27.c fec.h
gcc $(CFLAGS) -g -c $<
vtest29.o: vtest29.c fec.h
gcc $(CFLAGS) -g -c $<
vtest39.o: vtest39.c fec.h
gcc $(CFLAGS) -g -c $<
vtest615.o: vtest615.c fec.h
gcc $(CFLAGS) -g -c $<
libfec.a: $(LIBS)
ar rv $@ $^
ranlib libfec.a
# for Darwin
libfec.dylib: $(LIBS)
$(CC) -dynamiclib -install_name $@ -o $@ $^
# for Linux et al
libfec.so: $(LIBS)
gcc -fPIC -shared -Xlinker -soname=$@ -o $@ -Wl,-whole-archive $^ -Wl,-no-whole-archive -lc -lm
dotprod.o: dotprod.c fec.h
dotprod_port.o: dotprod_port.c fec.h
viterbi27.o: viterbi27.c fec.h
viterbi27_port.o: viterbi27_port.c fec.h
viterbi29.o: viterbi29.c fec.h
viterbi39.o: viterbi39.c fec.h
viterbi39_port.o: viterbi39_port.c fec.h
viterbi39_sse2.o: viterbi39_sse2.c fec.h
viterbi39_sse.o: viterbi39_sse.c fec.h
viterbi39_mmx.o: viterbi39_mmx.c fec.h
encode_rs_char.o: encode_rs_char.c char.h rs-common.h
encode_rs_int.o: encode_rs_int.c int.h rs-common.h
encode_rs_8.o: encode_rs_8.c fixed.h
encode_rs_av.o: encode_rs_av.c fixed.h
decode_rs_char.o: decode_rs_char.c char.h rs-common.h
decode_rs_int.o: decode_rs_int.c int.h rs-common.h
decode_rs_8.o: decode_rs_8.c fixed.h
init_rs_char.o: init_rs_char.c char.h rs-common.h
init_rs_int.o: init_rs_int.c int.h rs-common.h
ccsds_tab.o: ccsds_tab.c
ccsds_tab.c: gen_ccsds
./gen_ccsds > ccsds_tab.c
gen_ccsds: gen_ccsds.o init_rs_char.o
gcc $(CFLAGS) -o $@ $^
gen_ccsds.o: gen_ccsds.c
gcc $(CFLAGS) -c -o $@ $<
ccsds_tal.o: ccsds_tal.c
ccsds_tal.c: gen_ccsds_tal
./gen_ccsds_tal > ccsds_tal.c
exercise_char.o: exercise.c
gcc $(CFLAGS) -c -o $@ $<
exercise_int.o: exercise.c
gcc -DBIGSYM=1 $(CFLAGS) -c -o $@ $<
exercise_8.o: exercise.c
gcc -DFIXED=1 $(CFLAGS) -c -o $@ $<
exercise_ccsds.o: exercise.c
gcc -DCCSDS=1 $(CFLAGS) -c -o $@ $<
viterbi27.o: viterbi27.c fec.h
viterbi27_port.o: viterbi27_port.c fec.h
viterbi27_av.o: viterbi27_av.c fec.h
viterbi27_mmx.o: viterbi27_mmx.c fec.h
gcc $(CFLAGS) -mmmx -c -o $@ $<
viterbi27_sse.o: viterbi27_sse.c fec.h
gcc $(CFLAGS) -msse -c -o $@ $<
viterbi27_sse2.o: viterbi27_sse2.c fec.h
gcc $(CFLAGS) -msse2 -c -o $@ $<
viterbi29.o: viterbi29.c fec.h
viterbi29_port.o: viterbi29_port.c fec.h
viterbi29_av.o: viterbi29_av.c fec.h
viterbi29_mmx.o: viterbi29_mmx.c fec.h
gcc $(CFLAGS) -mmmx -c -o $@ $<
viterbi29_sse.o: viterbi29_sse.c fec.h
gcc $(CFLAGS) -msse -c -o $@ $<
viterbi29_sse2.o: viterbi29_sse2.c fec.h
gcc $(CFLAGS) -msse2 -c -o $@ $<
viterbi39.o: viterbi39.c fec.h
viterbi39_port.o: viterbi39_port.c fec.h
viterbi39_av.o: viterbi39_av.c fec.h
viterbi39_mmx.o: viterbi39_mmx.c fec.h
gcc $(CFLAGS) -mmmx -c -o $@ $<
viterbi39_sse.o: viterbi39_sse.c fec.h
gcc $(CFLAGS) -msse -c -o $@ $<
viterbi39_sse2.o: viterbi39_sse2.c fec.h
gcc $(CFLAGS) -msse2 -c -o $@ $<
viterbi615.o: viterbi615.c fec.h
viterbi615_port.o: viterbi615_port.c fec.h
viterbi615_av.o: viterbi615_av.c fec.h
viterbi615_mmx.o: viterbi615_mmx.c fec.h
gcc $(CFLAGS) -mmmx -c -o $@ $<
viterbi615_sse.o: viterbi615_sse.c fec.h
gcc $(CFLAGS) -msse -c -o $@ $<
viterbi615_sse2.o: viterbi615_sse2.c fec.h
gcc $(CFLAGS) -msse2 -c -o $@ $<
cpu_mode_x86.o: cpu_mode_x86.c fec.h
cpu_mode_x86_64.o: cpu_mode_x86_64.c fec.h
cpu_mode_ppc.o: cpu_mode_ppc.c fec.h
#%.o: %.s
# $(AS) $< -o $@
clean:
rm -f *.o $(SHARED_LIB) *.a rs_speedtest peaktest sumsq_test dtest vtest27 vtest29 vtest39 vtest615 rstest ccsds_tab.c ccsds_tal.c gen_ccsds gen_ccsds_tal core
rm -rf autom4te.cache
distclean: clean
rm -f config.log config.cache config.status config.h makefile

148
libfec/mmxbfly27.s Normal file
View File

@ -0,0 +1,148 @@
/* Intel SIMD MMX implementation of Viterbi ACS butterflies
for 64-state (k=7) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
int update_viterbi27_blk_mmx(struct v27 *vp,unsigned char *syms,int nbits) ;
*/
# MMX (64-bit SIMD) version
# requires Pentium-MMX, Pentium-II or better
# These are offsets into struct v27, defined in viterbi27_mmx.c
.set DP,128
.set OLDMETRICS,132
.set NEWMETRICS,136
.text
.global update_viterbi27_blk_mmx,Mettab27_1,Mettab27_2
.type update_viterbi27_blk_mmx,@function
.align 16
update_viterbi27_blk_mmx:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
movl 12(%ebp),%ebx # ebx = syms
movw (%ebx),%ax # ax = second symbol : first symbol
addl $2,%ebx
movl %ebx,12(%ebp)
movb %ah,%bl
andl $255,%eax
andl $255,%ebx
# shift into first array index dimension slot
shll $5,%eax
shll $5,%ebx
# each invocation of this macro will do 8 butterflies in parallel
.MACRO butterfly GROUP
# Compute branch metrics
movq (Mettab27_1+8*\GROUP)(%eax),%mm3
movq fifteens,%mm0
paddb (Mettab27_2+8*\GROUP)(%ebx),%mm3
paddb ones,%mm3 # emulate pavgb - this may not be necessary
psrlq $1,%mm3
pand %mm0,%mm3
movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
movq ((8*\GROUP)+32)(%esi),%mm2 # Incoming path metric, high bit = 1
movq %mm6,%mm1
movq %mm2,%mm7
paddb %mm3,%mm6
paddb %mm3,%mm2
pxor %mm0,%mm3 # invert branch metric
paddb %mm3,%mm7 # path metric for inverted symbols
paddb %mm3,%mm1
# live registers 1 2 6 7
# Compare mm6 and mm7; mm1 and mm2
pxor %mm3,%mm3
movq %mm6,%mm4
movq %mm1,%mm5
psubb %mm7,%mm4 # mm4 = mm6 - mm7
psubb %mm2,%mm5 # mm5 = mm1 - mm2
pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
# live registers 1 2 4 5 6 7
# select survivors
movq %mm4,%mm0
pand %mm4,%mm7
movq %mm5,%mm3
pand %mm5,%mm2
pandn %mm6,%mm0
pandn %mm1,%mm3
por %mm0,%mm7 # mm7 = first set of survivors
por %mm3,%mm2 # mm2 = second set of survivors
# live registers 2 4 5 7
# interleave & store decisions in mm4, mm5
# interleave & store new branch metrics in mm2, mm7
movq %mm4,%mm3
movq %mm7,%mm0
punpckhbw %mm5,%mm4
punpcklbw %mm5,%mm3
punpcklbw %mm2,%mm7 # interleave second 8 new metrics
punpckhbw %mm2,%mm0 # interleave first 8 new metrics
movq %mm4,(16*\GROUP+8)(%edx)
movq %mm3,(16*\GROUP)(%edx)
movq %mm7,(16*\GROUP)(%edi)
movq %mm0,(16*\GROUP+8)(%edi)
.endm
# invoke macro 4 times for a total of 32 butterflies
butterfly GROUP=0
butterfly GROUP=1
butterfly GROUP=2
butterfly GROUP=3
addl $64,%edx # bump decision pointer
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: emms
movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 8
fifteens:
.byte 15,15,15,15,15,15,15,15
.align 8
ones: .byte 1,1,1,1,1,1,1,1

161
libfec/mmxbfly29.s Normal file
View File

@ -0,0 +1,161 @@
/* Intel SIMD MMX implementation of Viterbi ACS butterflies
for 256-state (k=9) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
void update_viterbi29_blk_mmx(struct v29 *vp,unsigned char *syms,int nbits);
*/
# These are offsets into struct v29, defined in viterbi29.h
.set DP,512
.set OLDMETRICS,516
.set NEWMETRICS,520
.text
.global update_viterbi29_blk_mmx,Mettab29_1,Mettab29_2
.type update_viterbi29_blk_mmx,@function
.align 16
# MMX (64-bit SIMD) version
# requires Pentium-MMX, Pentium-II or better
update_viterbi29_blk_mmx:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
movl 12(%ebp),%ebx # ebx = syms
movw (%ebx),%ax # ax = second symbol : first symbol
addl $2,%ebx
movl %ebx,12(%ebp)
movb %ah,%bl
andl $255,%eax
andl $255,%ebx
# shift into first array index dimension slot
shll $7,%eax
shll $7,%ebx
# each invocation of this macro will do 8 butterflies in parallel
.MACRO butterfly GROUP
# Compute branch metrics
movq (Mettab29_1+8*\GROUP)(%eax),%mm3
movq fifteens,%mm0
paddb (Mettab29_2+8*\GROUP)(%ebx),%mm3
paddb ones,%mm3 # emulate pavgb - this may not be necessary
psrlq $1,%mm3
pand %mm0,%mm3
movq (8*\GROUP)(%esi),%mm6 # Incoming path metric, high bit = 0
movq ((8*\GROUP)+128)(%esi),%mm2 # Incoming path metric, high bit = 1
movq %mm6,%mm1
movq %mm2,%mm7
paddb %mm3,%mm6
paddb %mm3,%mm2
pxor %mm0,%mm3 # invert branch metric
paddb %mm3,%mm7 # path metric for inverted symbols
paddb %mm3,%mm1
# live registers 1 2 6 7
# Compare mm6 and mm7; mm1 and mm2
pxor %mm3,%mm3
movq %mm6,%mm4
movq %mm1,%mm5
psubb %mm7,%mm4 # mm4 = mm6 - mm7
psubb %mm2,%mm5 # mm5 = mm1 - mm2
pcmpgtb %mm3,%mm4 # mm4 = first set of decisions (ff = 1 better)
pcmpgtb %mm3,%mm5 # mm5 = second set of decisions
# live registers 1 2 4 5 6 7
# select survivors
movq %mm4,%mm0
pand %mm4,%mm7
movq %mm5,%mm3
pand %mm5,%mm2
pandn %mm6,%mm0
pandn %mm1,%mm3
por %mm0,%mm7 # mm7 = first set of survivors
por %mm3,%mm2 # mm2 = second set of survivors
# live registers 2 4 5 7
# interleave & store decisions in mm4, mm5
# interleave & store new branch metrics in mm2, mm7
movq %mm4,%mm3
movq %mm7,%mm0
punpckhbw %mm5,%mm4
punpcklbw %mm5,%mm3
punpcklbw %mm2,%mm7 # interleave second 8 new metrics
punpckhbw %mm2,%mm0 # interleave first 8 new metrics
movq %mm4,(16*\GROUP+8)(%edx)
movq %mm3,(16*\GROUP)(%edx)
movq %mm7,(16*\GROUP)(%edi)
movq %mm0,(16*\GROUP+8)(%edi)
.endm
# invoke macro 16 times for a total of 128 butterflies
butterfly GROUP=0
butterfly GROUP=1
butterfly GROUP=2
butterfly GROUP=3
butterfly GROUP=4
butterfly GROUP=5
butterfly GROUP=6
butterfly GROUP=7
butterfly GROUP=8
butterfly GROUP=9
butterfly GROUP=10
butterfly GROUP=11
butterfly GROUP=12
butterfly GROUP=13
butterfly GROUP=14
butterfly GROUP=15
addl $256,%edx # bump decision pointer
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: emms
movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 8
fifteens:
.byte 15,15,15,15,15,15,15,15
.align 8
ones: .byte 1,1,1,1,1,1,1,1

70
libfec/peak_mmx_assist.s Normal file
View File

@ -0,0 +1,70 @@
# MMX assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
# Find peak value in signed 16-bit input samples
# int peakval_mmx(signed short *in,int cnt);
.global peakval_mmx
.type peakval_mmx,@function
.align 16
peakval_mmx:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
pushl %ebx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %mm7,%mm7 # clear peak
1: subl $4,%ecx
jl 2f
movq (%esi),%mm0
movq %mm0,%mm1
psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
pxor %mm1,%mm0 # complement negatives
psubw %mm1,%mm0 # add 1 to negatives
movq %mm7,%mm6 # copy previous peak
pcmpgtw %mm0,%mm6 # ff == old peak greater
pand %mm6,%mm7 # select old peaks that are greater
pandn %mm0,%mm6 # select new values that are greater
por %mm6,%mm7
addl $8,%esi
jmp 1b
2: movd %mm7,%eax
psrlq $16,%mm7
andl $0xffff,%eax
movd %mm7,%edx
psrlq $16,%mm7
andl $0xffff,%edx
cmpl %edx,%eax
jnl 3f
movl %edx,%eax
3:
movd %mm7,%edx
psrlq $16,%mm7
andl $0xffff,%edx
cmpl %edx,%eax
jnl 4f
movl %edx,%eax
4:
movd %mm7,%edx
andl $0xffff,%edx
cmpl %edx,%eax
jnl 5f
movl %edx,%eax
5:
emms
popl %ebx
popl %ecx
popl %esi
popl %ebp
ret

51
libfec/peak_sse2_assist.s Normal file
View File

@ -0,0 +1,51 @@
# SSE2 assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Public License (GPL)
.text
# Find peak absolute value in signed 16-bit input samples
# int peakval_sse2(signed short *in,int cnt);
.global peakval_sse2
.type peakval_sse2,@function
.align 16
peakval_sse2:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %xmm7,%xmm7 # clear peak
1: subl $8,%ecx
jl 2f
movaps (%esi),%xmm0
movaps %xmm0,%xmm1
psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive
pxor %xmm1,%xmm0 # complement negatives
psubw %xmm1,%xmm0 # add 1 to negatives
pmaxsw %xmm0,%xmm7 # store peak
addl $16,%esi
jmp 1b
2: movaps %xmm7,%xmm0
psrldq $8,%xmm0
pmaxsw %xmm0,%xmm7
movaps %xmm7,%xmm0
psrlq $32,%xmm0
pmaxsw %xmm0,%xmm7
movaps %xmm7,%xmm0
psrlq $16,%xmm0
pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7
movd %xmm7,%eax
andl $0xffff,%eax
popl %ecx
popl %esi
popl %ebp
ret

49
libfec/peak_sse_assist.s Normal file
View File

@ -0,0 +1,49 @@
# SSE assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
# Find peak absolute value in signed 16-bit input samples
# int peakval_sse(signed short *in,int cnt);
.global peakval_sse
.type peakval_sse,@function
.align 16
peakval_sse:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %mm7,%mm7 # clear peak
1: subl $4,%ecx
jl 2f
movq (%esi),%mm0
movq %mm0,%mm1
psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
pxor %mm1,%mm0 # complement negatives
psubw %mm1,%mm0 # add 1 to negatives
pmaxsw %mm0,%mm7 # store peak
addl $8,%esi
jmp 1b
2: movq %mm7,%mm0
psrlq $32,%mm0
pmaxsw %mm0,%mm7
movq %mm7,%mm0
psrlq $16,%mm0
pmaxsw %mm0,%mm7 # min value in low word of %mm7
movd %mm7,%eax
andl $0xffff,%eax
emms
popl %ecx
popl %esi
popl %ebp
ret

38
libfec/peaktest.c Normal file
View File

@ -0,0 +1,38 @@
/* Verify correctness of the peak routine
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/* These values should trigger leading/trailing array fragment handling */
#define NSAMP 200002
#define OFFSET 1
int peakval(signed short *,int);
int peakval_port(signed short *,int);
int main(){
int i,s;
int result,rresult;
signed short samples[NSAMP];
srandom(time(NULL));
for(i=0;i<NSAMP;i++){
do {
s = random() & 0x0fff;
} while(s == 0x8000);
samples[i] = s;
}
samples[5] = 25000;
rresult = peakval_port(&samples[OFFSET],NSAMP-OFFSET);
result = peakval(&samples[OFFSET],NSAMP-OFFSET);
if(result == rresult){
printf("OK\n");
} else {
printf("peak mismatch: %d != %d\n",result,rresult);
}
exit(0);
}

50
libfec/peakval.c Normal file
View File

@ -0,0 +1,50 @@
/* Switch to appropriate version of peakval routine
* Copyright 2004, Phil Karn, KA9Q
*/
#include <stdlib.h>
#include "fec.h"
int peakval_port(signed short *b,int cnt);
#ifdef __i386__
int peakval_mmx(signed short *b,int cnt);
int peakval_sse(signed short *b,int cnt);
int peakval_sse2(signed short *b,int cnt);
#endif
#ifdef __x86_64__
int peakval_sse2(signed short *b,int cnt);
#endif
#ifdef __VEC__
int peakval_av(signed short *b,int cnt);
#endif
int peakval(signed short *b,int cnt){
find_cpu_mode();
switch(Cpu_mode){
case PORT:
default:
return peakval_port(b,cnt);
#ifdef __i386__
case MMX:
return peakval_mmx(b,cnt);
case SSE:
return peakval_sse(b,cnt);
case SSE2:
return peakval_sse2(b,cnt);
#endif
#ifdef __x86_64__
case SSE2:
return peakval_port(b,cnt);
//return peakval_sse2(b,cnt);
#endif
#ifdef __VEC__
case ALTIVEC:
return peakval_av(b,cnt);
#endif
}
}

61
libfec/peakval_av.c Normal file
View File

@ -0,0 +1,61 @@
/* Return the largest absolute value of a vector of signed shorts
* This is the Altivec SIMD version.
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include "fec.h"
signed short peakval_av(signed short *in,int cnt){
vector signed short x;
int pad;
union { vector signed char cv; vector signed short hv; signed short s[8]; signed char c[16];} s;
vector signed short smallest,largest;
smallest = (vector signed short)(0);
largest = (vector signed short)(0);
if((pad = (int)in & 15)!=0){
/* Load unaligned leading word */
x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in));
if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */
s.c[15] = (8-cnt)<<4;
x = vec_sro(x,s.cv);
}
smallest = vec_min(smallest,x);
largest = vec_max(largest,x);
in += 8-pad/2;
cnt -= 8-pad/2;
}
/* Everything is now aligned, rip through most of the block */
while(cnt >= 8){
x = vec_ld(0,in);
smallest = vec_min(smallest,x);
largest = vec_max(largest,x);
in += 8;
cnt -= 8;
}
/* Handle trailing fragment, if any */
if(cnt > 0){
x = vec_ld(0,in);
s.c[15] = (8-cnt)<<4;
x = vec_sro(x,s.cv);
smallest = vec_min(smallest,x);
largest = vec_max(largest,x);
}
/* Combine and extract result */
largest = vec_max(largest,vec_abs(smallest));
s.c[15] = 64; /* Shift right four 16-bit words */
largest = vec_max(largest,vec_sro(largest,s.cv));
s.c[15] = 32; /* Shift right two 16-bit words */
largest = vec_max(largest,vec_sro(largest,s.cv));
s.c[15] = 16; /* Shift right one 16-bit word */
largest = vec_max(largest,vec_sro(largest,s.cv));
s.hv = largest;
return s.s[7];
}

34
libfec/peakval_mmx.c Normal file
View File

@ -0,0 +1,34 @@
/* Wrapper for the MMX version of peakval
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdlib.h>
int peakval_mmx_assist(signed short *,int);
int peakval_mmx(signed short *b,int cnt){
int peak = 0;
int a;
while(((int)b & 7) != 0 && cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
a = peakval_mmx_assist(b,cnt);
if(a > peak)
peak = a;
b += cnt & ~3;
cnt &= 3;
while(cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
return peak;
}

View File

@ -0,0 +1,70 @@
# MMX assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
# Find peak value in signed 16-bit input samples
# int peakval_mmx_assist(signed short *in,int cnt);
.global peakval_mmx_assist
.type peakval_mmx_assist,@function
.align 16
peakval_mmx_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
pushl %ebx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %mm7,%mm7 # clear peak
1: subl $4,%ecx
jl 2f
movq (%esi),%mm0
movq %mm0,%mm1
psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
pxor %mm1,%mm0 # complement negatives
psubw %mm1,%mm0 # add 1 to negatives
movq %mm7,%mm6 # copy previous peak
pcmpgtw %mm0,%mm6 # ff == old peak greater
pand %mm6,%mm7 # select old peaks that are greater
pandn %mm0,%mm6 # select new values that are greater
por %mm6,%mm7
addl $8,%esi
jmp 1b
2: movd %mm7,%eax
psrlq $16,%mm7
andl $0xffff,%eax
movd %mm7,%edx
psrlq $16,%mm7
andl $0xffff,%edx
cmpl %edx,%eax
jnl 3f
movl %edx,%eax
3:
movd %mm7,%edx
psrlq $16,%mm7
andl $0xffff,%edx
cmpl %edx,%eax
jnl 4f
movl %edx,%eax
4:
movd %mm7,%edx
andl $0xffff,%edx
cmpl %edx,%eax
jnl 5f
movl %edx,%eax
5:
emms
popl %ebx
popl %ecx
popl %esi
popl %ebp
ret

16
libfec/peakval_port.c Normal file
View File

@ -0,0 +1,16 @@
/* Portable C version of peakval
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdlib.h>
#include "fec.h"
int peakval_port(signed short *b,int len){
int peak = 0;
int a,i;
for(i=0;i<len;i++){
a = abs(b[i]);
if(a > peak)
peak = a;
}
return peak;
}

35
libfec/peakval_sse.c Normal file
View File

@ -0,0 +1,35 @@
/* IA-32 SSE version of peakval
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdlib.h>
#include "fec.h"
int peakval_sse_assist(signed short *,int);
int peakval_sse(signed short *b,int cnt){
int peak = 0;
int a;
while(((int)b & 7) != 0 && cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
a = peakval_sse_assist(b,cnt);
if(a > peak)
peak = a;
b += cnt & ~3;
cnt &= 3;
while(cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
return peak;
}

34
libfec/peakval_sse2.c Normal file
View File

@ -0,0 +1,34 @@
/* Portable C version of peakval
* Copyright 2004 Phil Karn, KA9Q
*/
#include <stdlib.h>
#include "fec.h"
int peakval_sse2_assist(signed short *,int);
int peakval_sse2(signed short *b,int cnt){
int peak = 0;
int a;
while(((int)b & 15) != 0 && cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
a = peakval_sse2_assist(b,cnt);
if(a > peak)
peak = a;
b += cnt & ~7;
cnt &= 7;
while(cnt != 0){
a = abs(*b);
if(a > peak)
peak = a;
b++;
cnt--;
}
return peak;
}

View File

@ -0,0 +1,51 @@
# SSE2 assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
# Find peak absolute value in signed 16-bit input samples
# int peakval_sse2_assist(signed short *in,int cnt);
.global peakval_sse2_assist
.type peakval_sse2_assist,@function
.align 16
peakval_sse2_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %xmm7,%xmm7 # clear peak
1: subl $8,%ecx
jl 2f
movaps (%esi),%xmm0
movaps %xmm0,%xmm1
psraw $15,%xmm1 # xmm1 = 1's if negative, 0's if positive
pxor %xmm1,%xmm0 # complement negatives
psubw %xmm1,%xmm0 # add 1 to negatives
pmaxsw %xmm0,%xmm7 # store peak
addl $16,%esi
jmp 1b
2: movaps %xmm7,%xmm0
psrldq $8,%xmm0
pmaxsw %xmm0,%xmm7
movaps %xmm7,%xmm0
psrlq $32,%xmm0
pmaxsw %xmm0,%xmm7
movaps %xmm7,%xmm0
psrlq $16,%xmm0
pmaxsw %xmm0,%xmm7 # min value in low word of %xmm7
movd %xmm7,%eax
andl $0xffff,%eax
popl %ecx
popl %esi
popl %ebp
ret

View File

@ -0,0 +1,49 @@
# SSE assist routines for peakval
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Lesser General Public License (LGPL)
.text
# Find peak absolute value in signed 16-bit input samples
# int peakval_sse_assist(signed short *in,int cnt);
.global peakval_sse_assist
.type peakval_sse_assist,@function
.align 16
peakval_sse_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %mm7,%mm7 # clear peak
1: subl $4,%ecx
jl 2f
movq (%esi),%mm0
movq %mm0,%mm1
psraw $15,%mm1 # mm1 = 1's if negative, 0's if positive
pxor %mm1,%mm0 # complement negatives
psubw %mm1,%mm0 # add 1 to negatives
pmaxsw %mm0,%mm7 # store peak
addl $8,%esi
jmp 1b
2: movq %mm7,%mm0
psrlq $32,%mm0
pmaxsw %mm0,%mm7
movq %mm7,%mm0
psrlq $16,%mm0
pmaxsw %mm0,%mm7 # min value in low word of %mm7
movd %mm7,%eax
andl $0xffff,%eax
emms
popl %ecx
popl %esi
popl %ebp
ret

26
libfec/rs-common.h Normal file
View File

@ -0,0 +1,26 @@
/* Stuff common to all the general-purpose Reed-Solomon codecs
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
/* Reed-Solomon codec control block */
struct rs {
int mm; /* Bits per symbol */
int nn; /* Symbols per block (= (1<<mm)-1) */
data_t *alpha_to; /* log lookup table */
data_t *index_of; /* Antilog lookup table */
data_t *genpoly; /* Generator polynomial */
int nroots; /* Number of generator roots = number of parity symbols */
int fcr; /* First consecutive root, index form */
int prim; /* Primitive element, index form */
int iprim; /* prim-th root of 1, index form */
int pad; /* Padding bytes in shortened block */
};
static inline int modnn(struct rs *rs,int x){
while (x >= rs->nn) {
x -= rs->nn;
x = (x >> rs->mm) + (x & rs->nn);
}
return x;
}

198
libfec/rs.3 Normal file
View File

@ -0,0 +1,198 @@
.TH REED-SOLOMON 3
.SH NAME
init_rs_int, encode_rs_int, decode_rs_int, free_rs_int,
init_rs_char, encode_rs_char, decode_rs_char, free_rs_char,
encode_rs_8, decode_rs_8, encode_rs_ccsds, decode_rs_ccsds
\- Reed-Solomon encoding/decoding
.SH SYNOPSIS
.nf
.ft B
#include "fec.h"
void *init_rs_int(int symsize,int gfpoly,int fcr,int prim,
int nroots,int pad);
void encode_rs_int(void *rs,int *data,int *parity);
int decode_rs_int(void *rs,int *data,int *eras_pos,int no_eras);
void free_rs_int(void *rs);
void *init_rs_char(int symsize,int gfpoly,int fcr,int prim,
int nroots,int pad);
void encode_rs_char(void *rs,unsigned char *data,
unsigned char *parity);
int decode_rs_char(void *rs,unsigned char *data,int *eras_pos,
int no_eras);
void free_rs_char(void *rs);
void encode_rs_8(unsigned char *data,unsigned char *parity,
int pad);
int decode_rs_8(unsigned char *data,int *eras_pos,int no_eras,
int pad);
void encode_rs_ccsds(unsigned char *data,unsigned char *parity,
int pad);
int decode_rs_ccsds(unsigned char *data,int *eras_pos,int no_eras,
int pad);
unsigned char Taltab[256];
unsigned char Tal1tab[256];
.fi
.SH DESCRIPTION
These functions implement Reed-Solomon error control encoding and
decoding. For optimal performance in a variety of applications, three
sets of functions are supplied. To access these functions, add "-lfec"
to your linker command line.
The functions with names ending in \fB_int\fR handle data in integer arrays,
permitting arbitrarily large codewords limited only by machine
resources.
The functions with names ending in \fB_char\fR take unsigned char arrays and can
handle codes with symbols of 8 bits or less (i.e., with codewords of
255 symbols or less).
\fBencode_rs_8\fR and \fBdecode_rs_8\fR implement a specific
(255,223) code with 8-bit symbols specified by the CCSDS:
a field generator of 1 + X + X^2 + X^7 + X^8 and a code
generator with first consecutive root = 112 and a primitive element of
11. These functions use the conventional
polynomial form, \fInot\fR the dual-basis specified in
the CCSDS standard, to represent symbols. This code may be
shortened by giving a non-zero \fBpad\fR value to produce a
(255-\fBpad\fR,223-\fBpad\fR) code. The padding will consist of the
specified number of zeroes at the front of the full codeword.
For full CCSDS compatibility, \fBencode_rs_ccsds\fR and
\fBdecode_rs_ccsds\fR are provided. These functions use two lookup
tables, \fBTaltab\fR to convert from conventional to dual-basis, and
\fBTal1tab\fR to perform the inverse mapping from dual-basis to
conventional form, before and after calls to \fBencode_rs_8\fR
and \fBdecode_rs_8\fR.
The \fB_8\fR and \fB_ccsds\fR functions do not require initialization.
To use the general purpose RS encoder or decoder (i.e.,
the \fB_char\fR or \fB_int\fR versions), the user must first
call \fBinit_rs_int\fR or \fBinit_rs_char\fR as appropriate. The
arguments are as follows:
\fBsymsize\fR gives the symbol size in bits, up to 8 for \fBinit_rs_char\fR
or 32 for \fBinit_rs_int\fR on a machine with 32-bit ints (though such a
huge code would exhaust memory limits on a 32-bit machine). The resulting
Reed-Solomon code word will have 2^\fBsymsize\fR - 1 symbols,
each containing \fBsymsize\fR bits. The codeword may be shortened with the
\fBpad\fR parameter described below.
\fBgfpoly\fR gives the extended Galois field generator polynomial coefficients,
with the 0th coefficient in the low order bit. The polynomial
\fImust\fR be primitive; if not, the call will fail and NULL will be
returned.
\fBfcr\fR gives, in index form, the first consecutive root of the
Reed Solomon code generator polynomial.
\fBprim\fR gives, in index form, the primitive element in the Galois field
used to generate the Reed Solomon code generator polynomial.
\fBnroots\fR gives the number of roots in the Reed Solomon code
generator polynomial. This equals the number of parity symbols
per code block.
\fBpad\fR gives the number of leading symbols in the codeword
that are implicitly padded to zero in a shortened code block.
The resulting Reed-Solomon code has parameters (N,K), where
N = 2^\fBsymsize\fR - \fBpad\fR - 1 and K = N-\fBnroots\fR.
The \fBencode_rs_char\fR and \fBencode_rs_int\fR functions accept
the pointer returned by \fBinit_rs_char\fR or
\fBinit_rs_int\fR, respectively, to
encode a block of data using the specified code.
The input data array is expected to
contain K symbols (of \fBsymsize\fR bits each, right justified
in each char or int) and \fBnroots\fR parity symbols will be placed
into the \fBparity\fR array, right justified.
The \fBdecode_\fR functions correct
the errors in a Reed-Solomon codeword of N symbols up to the capability of the code.
An optional list of "erased" symbol indices may be given in the \fBeras_pos\fR
array to assist the decoder; this parameter may be NULL if no erasures
are given. The number of erased symbols must be given in the \fBno_eras\fR
parameter.
To maximize performance, the encode and decode functions perform no
"sanity checking" of their inputs. Decoder failure may result if
\fBeras_pos\fR contains duplicate entries, and both encoder and
decoder will fail if an input symbol exceeds its allowable range.
(Symbol range overflow cannot occur with the \fB_8\fR or
\fB_ccsds\fR functions,
or with the \fB_char\fR functions when 8-bit symbols are specified.)
The decoder corrects the symbols "in place", returning the number
of symbols in error. If the codeword is uncorrectable, -1 is returned
and the data block is unchanged. If \fBeras_pos\fR is non-null, it is
used to return a list of corrected symbol positions, in no particular
order. This means that the
array passed through this parameter \fImust\fR have at least \fBnroots\fR
elements to prevent a possible buffer overflow.
The \fBfree_rs_int\fR and \fBfree_rs_char\fR functions free the internal
space allocated by the \fBinit_rs_int\fR and \fBinit_rs_char\fR functions,
respecitively.
The functions \fBencode_rs_8\fR and \fBdecode_rs_8\fR do not have
corresponding \fBinit\fR and \fBfree\fR, nor do they take the
\fBrs\fR argument accepted by the other functions as their parameters
are statically compiled. These functions implement a code
equivalent to calling
\fBinit_rs_char\fR(8,0x187,112,11,32,pad);
and using the resulting pointer with \fBencode_rs_char\fR and
\fBdecode_rs_char\fR.
.SH RETURN VALUES
\fBinit_rs_int\fR and \fBinit_rs_char\fR return a pointer to an internal
control structure that must be passed to the corresponding encode, decode
and free functions. These functions return NULL on error.
The \fBdecode_\fR functions return a count of corrected
symbols, or -1 if the block was uncorrectible.
.SH AUTHOR
Phil Karn, KA9Q (karn@ka9q.net), based heavily on earlier work by Robert
Morelos-Zaragoza (robert@spectra.eng.hawaii.edu) and Hari Thirumoorthy
(harit@spectra.eng.hawaii.edu). Extra improvements suggested by Detmar
Welz (dwelz@web.de).
.SH COPYRIGHT
Copyright 2004, Phil Karn, KA9Q. May be used under the terms of the
GNU Lesser General Public License (LGPL).
.SH SEE ALSO
CCSDS 101.0-B-6: Telemetry Channel Coding.
http://www.ccsds.org/documents/101x0b6.pdf
.SH NOTE
CCSDS chose the "dual basis" symbol representation because it
simplified the implementation of a Reed-Solomon encoder in dedicated
hardware. However, this approach holds no advantages for a software
implementation on a general purpose computer, so use of the dual basis
is recommended only if compatibility with the CCSDS standard is needed,
e.g., to decode data from an existing spacecraft using the CCSDS
standard. If you just want a fast (255,223) RS codec without needing
to interoperate with a CCSDS standard code, use \fBencode_rs_8\fR
and \fBdecode_rs_8\fR.

54
libfec/rs_speedtest.c Normal file
View File

@ -0,0 +1,54 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include "fec.h"
int main(){
unsigned char block[255];
int i;
void *rs;
struct rusage start,finish;
double extime;
int trials = 10000;
for(i=0;i<223;i++)
block[i] = 0x01;
rs = init_rs_char(8,0x187,112,11,32,0);
encode_rs_char(rs,block,&block[223]);
getrusage(RUSAGE_SELF,&start);
for(i=0;i<trials;i++){
#if 0
block[0] ^= 0xff; /* Introduce an error */
block[2] ^= 0xff; /* Introduce an error */
#endif
decode_rs_char(rs,block,NULL,0);
}
getrusage(RUSAGE_SELF,&finish);
extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
printf("Execution time for %d Reed-Solomon blocks using general decoder: %.2f sec\n",trials,extime);
printf("decoder speed: %g bits/s\n",trials*223*8/extime);
encode_rs_8(block,&block[223],0);
getrusage(RUSAGE_SELF,&start);
for(i=0;i<trials;i++){
#if 0
block[0] ^= 0xff; /* Introduce an error */
block[2] ^= 0xff; /* Introduce an error */
#endif
decode_rs_8(block,NULL,0,0);
}
getrusage(RUSAGE_SELF,&finish);
extime = finish.ru_utime.tv_sec - start.ru_utime.tv_sec + 1e-6*(finish.ru_utime.tv_usec - start.ru_utime.tv_usec);
printf("Execution time for %d Reed-Solomon blocks using CCSDS decoder: %.2f sec\n",trials,extime);
printf("decoder speed: %g bits/s\n",trials*223*8/extime);
exit(0);
}

296
libfec/rstest.c Normal file
View File

@ -0,0 +1,296 @@
/* Test the Reed-Solomon codecs
* for various block sizes and with random data and random error patterns
*
* Copyright 2002 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <time.h>
#include "fec.h"
struct etab {
int symsize;
int genpoly;
int fcs;
int prim;
int nroots;
int ntrials;
} Tab[] = {
{2, 0x7, 1, 1, 1, 10 },
{3, 0xb, 1, 1, 2, 10 },
{4, 0x13, 1, 1, 4, 10 },
{5, 0x25, 1, 1, 6, 10 },
{6, 0x43, 1, 1, 8, 10 },
{7, 0x89, 1, 1, 10, 10 },
{8, 0x11d, 1, 1, 32, 10 },
{8, 0x187, 112,11, 32, 10 }, /* Duplicates CCSDS codec */
{9, 0x211, 1, 1, 32, 10 },
{10,0x409, 1, 1, 32, 10 },
{11,0x805, 1, 1, 32, 10 },
{12,0x1053, 1, 1, 32, 5 },
{13,0x201b, 1, 1, 32, 2 },
{14,0x4443, 1, 1, 32, 1 },
{15,0x8003, 1, 1, 32, 1 },
{16,0x1100b, 1, 1, 32, 1 },
{0, 0, 0, 0, 0},
};
int exercise_char(struct etab *e);
int exercise_int(struct etab *e);
int exercise_8(void);
int main(){
int i;
srandom(time(NULL));
printf("Testing fixed CCSDS encoder...\n");
exercise_8();
for(i=0;Tab[i].symsize != 0;i++){
int nn,kk;
nn = (1<<Tab[i].symsize) - 1;
kk = nn - Tab[i].nroots;
printf("Testing (%d,%d) code...\n",nn,kk);
if(Tab[i].symsize <= 8)
exercise_char(&Tab[i]);
else
exercise_int(&Tab[i]);
}
exit(0);
}
int exercise_8(void){
int nn = 255;
unsigned char block[nn],tblock[nn];
int errlocs[nn],derrlocs[nn];
int i;
int errors;
int derrors,kk;
int errval,errloc;
int erasures;
int decoder_errors = 0;
/* Compute code parameters */
kk = 223;
/* Test up to the error correction capacity of the code */
for(errors=0;errors<=(nn-kk)/2;errors++){
/* Load block with random data and encode */
for(i=0;i<kk;i++)
block[i] = random() & nn;
memcpy(tblock,block,sizeof(block));
encode_rs_8(block,&block[kk],0);
/* Make temp copy, seed with errors */
memcpy(tblock,block,sizeof(block));
memset(errlocs,0,sizeof(errlocs));
memset(derrlocs,0,sizeof(derrlocs));
erasures=0;
for(i=0;i<errors;i++){
do {
errval = random() & nn;
} while(errval == 0); /* Error value must be nonzero */
do {
errloc = random() % nn;
} while(errlocs[errloc] != 0); /* Must not choose the same location twice */
errlocs[errloc] = 1;
#if FLAG_ERASURE
if(random() & 1) /* 50-50 chance */
derrlocs[erasures++] = errloc;
#endif
tblock[errloc] ^= errval;
}
/* Decode the errored block */
derrors = decode_rs_8(tblock,derrlocs,erasures,0);
if(derrors != errors){
printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
decoder_errors++;
}
for(i=0;i<derrors;i++){
if(errlocs[derrlocs[i]] == 0){
printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
decoder_errors++;
}
}
if(memcmp(tblock,block,sizeof(tblock)) != 0){
printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
decoder_errors++;
for(i=0;i<nn;i++)
printf(" %02x",tblock[i] ^ block[i]);
printf("\n");
}
}
return decoder_errors;
}
int exercise_char(struct etab *e){
int nn = (1<<e->symsize) - 1;
unsigned char block[nn],tblock[nn];
int errlocs[nn],derrlocs[nn];
int i;
int errors;
int derrors,kk;
int errval,errloc;
int erasures;
int decoder_errors = 0;
void *rs;
if(e->symsize > 8)
return -1;
/* Compute code parameters */
kk = nn - e->nroots;
rs = init_rs_char(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0);
if(rs == NULL){
printf("init_rs_char failed!\n");
return -1;
}
/* Test up to the error correction capacity of the code */
for(errors=0;errors <= e->nroots/2;errors++){
/* Load block with random data and encode */
for(i=0;i<kk;i++)
block[i] = random() & nn;
memcpy(tblock,block,sizeof(block));
encode_rs_char(rs,block,&block[kk]);
/* Make temp copy, seed with errors */
memcpy(tblock,block,sizeof(block));
memset(errlocs,0,sizeof(errlocs));
memset(derrlocs,0,sizeof(derrlocs));
erasures=0;
for(i=0;i<errors;i++){
do {
errval = random() & nn;
} while(errval == 0); /* Error value must be nonzero */
do {
errloc = random() % nn;
} while(errlocs[errloc] != 0); /* Must not choose the same location twice */
errlocs[errloc] = 1;
#if FLAG_ERASURE
if(random() & 1) /* 50-50 chance */
derrlocs[erasures++] = errloc;
#endif
tblock[errloc] ^= errval;
}
/* Decode the errored block */
derrors = decode_rs_char(rs,tblock,derrlocs,erasures);
if(derrors != errors){
printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
decoder_errors++;
}
for(i=0;i<derrors;i++){
if(errlocs[derrlocs[i]] == 0){
printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
decoder_errors++;
}
}
if(memcmp(tblock,block,sizeof(tblock)) != 0){
printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
decoder_errors++;
for(i=0;i<nn;i++)
printf(" %02x",tblock[i] ^ block[i]);
printf("\n");
}
}
free_rs_char(rs);
return 0;
}
int exercise_int(struct etab *e){
int nn = (1<<e->symsize) - 1;
int block[nn],tblock[nn];
int errlocs[nn],derrlocs[nn];
int i;
int errors;
int derrors,kk;
int errval,errloc;
int erasures;
int decoder_errors = 0;
void *rs;
/* Compute code parameters */
kk = nn - e->nroots;
rs = init_rs_int(e->symsize,e->genpoly,e->fcs,e->prim,e->nroots,0);
if(rs == NULL){
printf("init_rs_int failed!\n");
return -1;
}
/* Test up to the error correction capacity of the code */
for(errors=0;errors <= e->nroots/2;errors++){
/* Load block with random data and encode */
for(i=0;i<kk;i++)
block[i] = random() & nn;
memcpy(tblock,block,sizeof(block));
encode_rs_int(rs,block,&block[kk]);
/* Make temp copy, seed with errors */
memcpy(tblock,block,sizeof(block));
memset(errlocs,0,sizeof(errlocs));
memset(derrlocs,0,sizeof(derrlocs));
erasures=0;
for(i=0;i<errors;i++){
do {
errval = random() & nn;
} while(errval == 0); /* Error value must be nonzero */
do {
errloc = random() % nn;
} while(errlocs[errloc] != 0); /* Must not choose the same location twice */
errlocs[errloc] = 1;
#if FLAG_ERASURE
if(random() & 1) /* 50-50 chance */
derrlocs[erasures++] = errloc;
#endif
tblock[errloc] ^= errval;
}
/* Decode the errored block */
derrors = decode_rs_int(rs,tblock,derrlocs,erasures);
if(derrors != errors){
printf("(%d,%d) decoder says %d errors, true number is %d\n",nn,kk,derrors,errors);
decoder_errors++;
}
for(i=0;i<derrors;i++){
if(errlocs[derrlocs[i]] == 0){
printf("(%d,%d) decoder indicates error in location %d without error\n",nn,kk,derrlocs[i]);
decoder_errors++;
}
}
if(memcmp(tblock,block,sizeof(tblock)) != 0){
printf("(%d,%d) decoder uncorrected errors! output ^ input:",nn,kk);
decoder_errors++;
for(i=0;i<nn;i++)
printf(" %02x",tblock[i] ^ block[i]);
printf("\n");
}
}
free_rs_int(rs);
return 0;
}

43
libfec/sim.c Normal file
View File

@ -0,0 +1,43 @@
#include <math.h>
#include <stdlib.h>
#include "fec.h"
#define MAX_RANDOM 0x7fffffff
/* Generate gaussian random double with specified mean and std_dev */
double normal_rand(double mean, double std_dev)
{
double fac,rsq,v1,v2;
static double gset;
static int iset;
if(iset){
/* Already got one */
iset = 0;
return mean + std_dev*gset;
}
/* Generate two evenly distributed numbers between -1 and +1
* that are inside the unit circle
*/
do {
v1 = 2.0 * (double)random() / MAX_RANDOM - 1;
v2 = 2.0 * (double)random() / MAX_RANDOM - 1;
rsq = v1*v1 + v2*v2;
} while(rsq >= 1.0 || rsq == 0.0);
fac = sqrt(-2.0*log(rsq)/rsq);
gset = v1*fac;
iset++;
return mean + std_dev*v2*fac;
}
unsigned char addnoise(int sym,double amp,double gain,double offset,int clip){
int sample;
sample = offset + gain*normal_rand(sym?amp:-amp,1.0);
/* Clip to 8-bit offset range */
if(sample < 0)
sample = 0;
else if(sample > clip)
sample = clip;
return sample;
}

247
libfec/simd-viterbi.3 Normal file
View File

@ -0,0 +1,247 @@
.TH SIMD-VITERBI 3
.SH NAME
create_viterbi27, set_viterbi27_polynomial, init_viterbi27, update_viterbi27_blk,
chainback_viterbi27, delete_viterbi27,
create_viterbi29, set_viterbi_29_polynomial, init_viterbi29, update_viterbi29_blk,
chainback_viterbi29, delete_viterbi29,
create_viterbi39, set_viterbi_39_polynomial, init_viterbi39, update_viterbi39_blk,
chainback_viterbi39, delete_viterbi39,
create_viterbi615, set_viterbi615_polynomial, init_viterbi615, update_viterbi615_blk,
chainback_viterbi615, delete_viterbi615 -\ IA32 SIMD-assisted Viterbi decoders
.SH SYNOPSIS
.nf
.ft B
#include "fec.h"
void *create_viterbi27(int blocklen);
void set_viterbi27_polynomial(int polys[2]);
int init_viterbi27(void *vp,int starting_state);
int update_viterbi27_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi27(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi27(void *vp);
.fi
.sp
.nf
.ft B
void *create_viterbi29(int blocklen);
void set_viterbi29_polynomial(int polys[2]);
int init_viterbi29(void *vp,int starting_state);
int update_viterbi29_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi29(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi29(void *vp);
.fi
.sp
.nf
.ft B
void *create_viterbi39(int blocklen);
void set_viterbi39_polynomial(int polys[3]);
int init_viterbi39(void *vp,int starting_state);
int update_viterbi39_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi39(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi39(void *vp);
.fi
.sp
.nf
.ft B
void *create_viterbi615(int blocklen);
void set_viterbi615_polynomial(int polys[6]);
int init_viterbi615(void *vp,int starting_state);
int update_viterbi615_blk(void *vp,unsigned char syms[],int nbits);
int chainback_viterbi615(void *vp, unsigned char *data,unsigned int nbits,unsigned int endstate);
void delete_viterbi615(void *vp);
.fi
.SH DESCRIPTION
These functions implement high performance Viterbi decoders for four
convolutional codes: a rate 1/2 constraint length 7 (k=7) code
("viterbi27"), a rate 1/2 k=9 code ("viterbi29"),
a rate 1/3 k=9 code ("viterbi39") and a rate 1/6 k=15 code ("viterbi615").
The decoders use the Intel IA32 or PowerPC SIMD instruction sets, if available, to improve
decoding speed.
On the IA32 there are three different SIMD instruction sets. The first
and most common is MMX, introduced on later Intel Pentiums and then on
the Intel Pentium II and most Intel clones (AMD K6, Transmeta Crusoe,
etc). SSE was introduced on the Pentium III and later implemented in
the AMD Athlon 4 (AMD calls it "3D Now! Professional"). Most
recently, SSE2 was introduced in the Intel Pentium 4, and has been
adopted by more recent AMD CPUs. The presence of SSE2 implies the
existence of SSE, which in turn implies MMX.
Altivec is the PowerPC SIMD instruction set. It is roughly comparable
to SSE2. Altivec was introduced to the general public in the Apple
Macintosh G4; it is also present in the G5. Altivec is actually a
Motorola trademark; Apple calls it "Velocity Engine" and IBM calls it
"VMX". All refer to the same thing.
When built for the IA32 or PPC architectures, the functions
automatically use the most powerful SIMD instruction set available. If
no SIMD instructions are available, or if the library is built for a
non-IA32, non-PPC machine, a portable C version is executed
instead.
.SH USAGE
Four versions of each function are provided, one for each code.
In the following discussion, change "viterbi" to "viterbi27", "viterbi29", "viterbi39"
or "viterbi615" as desired.
Before Viterbi decoding can begin, an instance must first be created with
\fBcreate_viterbi()\fR. This function creates and returns a pointer to
an internal control structure
containing the path metrics and the branch
decisions. \fBcreate_viterbi()\fR takes one argument that gives the
length of the data block in bits. You \fImust not\fR attempt to
decode a block longer than the length given to \fBcreate_viterbi()\fR.
Before decoding a new frame,
\fBinit_viterbi()\fR must be called to reset the decoder state.
It accepts the instance pointer returned by
\fBcreate_viterbi()\fR and the initial starting state of the
convolutional encoder (usually 0). If the initial starting state is unknown or
incorrect, the decoder will still function but the decoded data may be
incorrect at the start of the block.
Blocks of received symbols are processed with calls to
\fBupdate_viterbi_blk()\fR. The \fBnbits\fR parameter specifies the
number of \fIdata bits\fR (not channel symbols) represented by the
\fBsyms\fR buffer. (For rate 1/2 codes, the number of symbols in
\fBsyms\fR is twice \fInbits\fR, and so on.)
Each symbol is expected to range
from 0 through 255, with 0 corresponding to a "strong 0" and 255
corresponding to a "strong 1". The caller is responsible for
determining the proper pairing of input symbols (commonly known as
decoder symbol phasing).
At the end of the block, the data is recovered with a call to
\fBchainback_viterbi()\fR. The arguments are the pointer to the
decoder instance, a pointer to a user-supplied buffer into which the
decoded data is to be written, the number of data bits (not bytes)
that are to be decoded, and the terminal state of the convolutional
encoder at the end of the frame (usually 0). If the terminal state is
incorrect or unknown, the decoded data bits at the end of the frame
may be unreliable. The decoded data is written in big-endian order,
i.e., the first bit in the frame is written into the high order bit of
the first byte in the buffer. If the frame is not an integral number
of bytes long, the low order bits of the last byte in the frame will
be unused.
Note that the decoders assume the use of a tail, i.e., the encoding
and transmission of a sufficient number of padding bits beyond the end
of the user data to force the convolutional encoder into the known
terminal state given to \fBchainback_viterbi()\fR. The tail is
always one bit less than the constraint length of the code, so the k=7
code uses 6 tail bits (12 tail symbols), the k=9 code uses 8 tail bits
(16 tail symbols) and the k=15 code uses 14 tail bits (84 tail
symbols).
The tail bits are not included in the length arguments to
\fBcreate_viterbi()\fR and \fBchainback_viterbi()\fR. For example, if
the block contains 1000 user bits, then this would be the length
parameter given to \fBcreate_viterbi27()\fR and
\fBchainback_viterbi27()\fR, and \fBupdate_viterbi27_blk()\fR would be called
with a total of 2012 symbols - the last 12 encoded symbols
representing the tail bits.
After the call to \fBchainback_viterbi()\fR, the decoder may be reset
with a call to \fBinit_viterbi()\fR and another block can be decoded.
Alternatively, \fBdelete_viterbi()\fR can be called to free all resources
used by the Viterbi decoder.
The \fBset_viterbi_polynomial()\fR function allows use of other than the default
code generator polynomials. Although only one set of polynomials are generally
used with each code, there can are different conventions as to their order and
symbol polarity, and these functions simplifies their use.
The default polynomials for the viterbi27 routes
are those of the NASA-JPL convention \fIwithout\fR symbol inversion.
The NASA-JPL convention normally inverts the first symbol.
The CCSDS/NASA-GSFC convention swaps the two symbols and inverts the second.
.sp
To set the NASA-JPL convention with symbol inversion:
.sp
.nf
.ft B
int polys[2] = { -V27POLYA,V27POLYB };
set_viterbi27_polynomial(polys);
.ft R
.fi
.sp
and to set the CCSDS convention with symbol inversion:
.sp
.nf
.ft B
int polys[2] = { V27POLYB,-V27POLYA };
set_viterbi27_polynomial(polys);
.ft R
.fi
.sp
The default polynomials for the viterbi615 routines
are those used by the Cassini spacecraft \fIwithout\fR
symbol inversion. Mars Pathfinder (MPF) and STEREO
swap the third and fourth polynomials.
Both conventions invert the
first, third and fifth symbols. Refer to fec.h for the polynomial constant definitions.
.sp
To set the Cassini convention with symbol inversion, do the following:
.nf
.ft B
int polys[6] = { -V615POLYA,V615POLYB,-V615POLYC,V615POLYD,-V615POLYE,V615POLYF };
set_viterbi615_polynomial(polys);
.ft R
.fi
.sp
and to set the MPF/STEREO convention with symbol inversion:
.sp
.nf
.ft B
int polys[6] = { -V615POLYA,V615POLYB,-V615POLYD,V615POLYC,-V615POLYE,V615POLYF };
set_viterbi615_polynomial(polys);
.ft R
.fi
For performance reasons, calling this function changes the code
generator polynomials for \fIall\fR instances of corresponding Viterbi decoder,
including those already created.
.SH ERROR PERFORMANCE
These decoders have all been extensively tested and found to provide
performance consistent with that expected for soft-decision Viterbi
decoding with 8-bit symbols.
Due to internal differences, the implementations
vary slightly in error performance. In
general, the portable C versions exhibit the best error performance
because they use full-sized branch metrics, and the MMX versions
exhibit the worst because they use 8-bit branch metrics with modulo
comparisons. The SSE, SSE2 and Altivec implementations of the r=1/2 k=7 and
r=1/2 k=9 codes use unsigned
8-bit branch metrics, and are almost as good as the C versions. The
r=1/3 k=9 and r=1/6 k=15 codes are implemented with 16-bit path metrics in all SIMD
versions.
.SH DIRECT ACCESS TO SPECIFIC FUNCTION VERSIONS
Calling the functions listed above automatically calls the appropriate
version of the function depending on the CPU type and available SIMD
instructions. A particular version can also be called directly by
appending the appropriate suffix to the function name. The available
suffixes are "_mmx", "_sse", "_sse2", "_av" and "_port", for the MMX,
SSE, SSE2, Altivec and portable versions, respectively. For example,
the SSE2 version of the update_viterbi27_blk() function can be invoked
as update_viterbi27_blk_sse2().
Naturally, the _av functions are only available on the PowerPC and the
_mmx, _sse and _sse2 versions are only available on IA-32. Calling
a SIMD-enabled function on a CPU that doesn't support the appropriate
set of instructions will result in an illegal instruction exception.
.SH RETURN VALUES
\fBcreate_viterbi\fR returns a pointer to the structure containing
the decoder state.
The other functions return -1 on error, 0 otherwise.
.SH AUTHOR & COPYRIGHT
Phil Karn, KA9Q (karn@ka9q.net)
.SH LICENSE
This software may be used under the terms of the GNU Limited General Public License (LGPL).

42
libfec/sqtest.c Normal file
View File

@ -0,0 +1,42 @@
/* Verify correctness of the sum-of-square routines */
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
/* These values should trigger leading/trailing array fragment handling */
#define NSAMP 200002
#define OFFSET 1
long long sumsq_wq(signed short *in,int cnt);
long long sumsq_wq_ref(signed short *in,int cnt);
int main(){
int i;
long long result,rresult;
signed short samples[NSAMP];
srandom(time(NULL));
for(i=0;i<NSAMP;i++)
samples[i] = random() & 0xffff;
rresult = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET);
result = sumsq_wq(&samples[OFFSET],NSAMP-OFFSET);
if(result == rresult){
printf("OK\n");
} else {
printf("sum mismatch: %lld != %lld\n",result,rresult);
}
exit(0);
}
long long sumsq_wq_ref(signed short *in,int cnt){
long long sum = 0;
int i;
for(i=0;i<cnt;i++){
sum += (long)in[i] * in[i];
}
return sum;
}

210
libfec/sse2bfly27-64.s Normal file
View File

@ -0,0 +1,210 @@
/* Intel SIMD (SSE2) implementations of Viterbi ACS butterflies
for 64-state (k=7) convolutional code
Copyright 2003 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
Modifications for x86_64, 2012 Matthias P. Braendli, HB9EGM:
- changed registers to x86-64 equivalents
- changed instructions accordingly
- %rip indirect addressing needed for position independent code,
which is required because x86-64 needs dynamic libs to be PIC
void update_viterbi27_blk_sse2(struct v27 *vp,unsigned char syms[],int nbits) ;
*/
# SSE2 (128-bit integer SIMD) version
# All X86-64 CPUs include SSE2
# These are offsets into struct v27, defined in viterbi27_av.c
.set DP,128
.set OLDMETRICS,132
.set NEWMETRICS,136
.text
.global update_viterbi27_blk_sse2,Branchtab27_sse2
.type update_viterbi27_blk_sse2,@function
.align 16
update_viterbi27_blk_sse2:
pushq %rbp
movq %rsp,%rbp
/* convention different between i386 and x86_64: rsi and rdi belong to called function, not caller */
/* Let's say we don't care (yet) */
pushq %rsi
pushq %rdi
pushq %rdx
pushq %rbx
movq 8(%rbp),%rdx # edx = vp
testq %rdx,%rdx
jnz 0f
movq -1,%rax
jmp err
0: movq OLDMETRICS(%rdx),%rsi # esi -> old metrics
movq NEWMETRICS(%rdx),%rdi # edi -> new metrics
movq DP(%rdx),%rdx # edx -> decisions
1: movq 16(%rbp),%rax # eax = nbits
decq %rax
jl 2f # passed zero, we're done
movq %rax,16(%rbp)
xorq %rax,%rax
movq 12(%rbp),%rbx # ebx = syms
movb (%rbx),%al
movd %rax,%xmm6 # xmm6[0] = first symbol
movb 1(%rbx),%al
movd %rax,%xmm5 # xmm5[0] = second symbol
addq $2,%rbx
movq %rbx,12(%rbp)
punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
punpcklbw %xmm5,%xmm5
pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
pshuflw $0,%xmm5,%xmm5
punpcklqdq %xmm6,%xmm6 # propagate to all 16
punpcklqdq %xmm5,%xmm5
# xmm6 now contains first symbol in each byte, xmm5 the second
movdqa thirtyones(%rip),%xmm7
# each invocation of this macro does 16 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movdqa (Branchtab27_sse2+(16*\GROUP))(%rip),%xmm4
movdqa (Branchtab27_sse2+32+(16*\GROUP))(%rip),%xmm3
pxor %xmm6,%xmm4
pxor %xmm5,%xmm3
# compute 5-bit branch metric in xmm4 by adding the individual symbol metrics
# This is okay for this
# code because the worst-case metric spread (at high Eb/No) is only 120,
# well within the range of our unsigned 8-bit path metrics, and even within
# the range of signed 8-bit path metrics
pavgb %xmm3,%xmm4
psrlw $3,%xmm4
pand %xmm7,%xmm4
movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
movdqa ((16*\GROUP)+32)(%esi),%xmm3 # Incoming path metric, high bit = 1
movdqa %xmm0,%xmm2
movdqa %xmm3,%xmm1
paddusb %xmm4,%xmm0 # note use of saturating arithmetic
paddusb %xmm4,%xmm3 # this shouldn't be necessary, but why not?
# negate branch metrics
pxor %xmm7,%xmm4
paddusb %xmm4,%xmm1
paddusb %xmm4,%xmm2
# Find survivors, leave in mm0,2
pminub %xmm1,%xmm0
pminub %xmm3,%xmm2
# get decisions, leave in mm1,3
pcmpeqb %xmm0,%xmm1
pcmpeqb %xmm2,%xmm3
# interleave and store new branch metrics in mm0,2
movdqa %xmm0,%xmm4
punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
movdqa %xmm0,(32*\GROUP+16)(%rdi)
movdqa %xmm4,(32*\GROUP)(%rdi)
# interleave decisions & store
movdqa %xmm1,%xmm4
punpckhbw %xmm3,%xmm1
punpcklbw %xmm3,%xmm4
# work around bug in gas due to Intel doc error
.byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
shlq $16,%rbx
.byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
orq %rax,%rbx
movq %rbx,(4*\GROUP)(%rdx)
.endm
# invoke macro 2 times for a total of 32 butterflies
butterfly GROUP=0
butterfly GROUP=1
addq $8,%rdx # bump decision pointer
# See if we have to normalize. This requires an explanation. We don't want
# our path metrics to exceed 255 on the *next* iteration. Since the
# largest branch metric is 30, that means we don't want any to exceed 225
# on *this* iteration. Rather than look them all, we just pick an arbitrary one
# (the first) and see if it exceeds 225-120=105, where 120 is the experimentally-
# determined worst-case metric spread for this code and branch metrics in the range 0-30.
# This is extremely conservative, and empirical testing at a variety of Eb/Nos might
# show that a higher threshold could be used without affecting BER performance
movq (%rdi),%rax # extract first output metric
andq $255,%rax
cmp $105,%rax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics. We can't just pick an arbitrary small constant because
# the minimum metric might be zero!
movdqa (%rdi),%xmm0
movdqa %xmm0,%xmm4
movdqa 16(%rdi),%xmm1
pminub %xmm1,%xmm4
movdqa 32(%rdi),%xmm2
pminub %xmm2,%xmm4
movdqa 48(%rdi),%xmm3
pminub %xmm3,%xmm4
# crunch down to single lowest metric
movdqa %xmm4,%xmm5
psrldq $8,%xmm5 # the count to psrldq is bytes, not bits!
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $32,%xmm5
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $16,%xmm5
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $8,%xmm5
pminub %xmm5,%xmm4 # now in lowest byte of %xmm4
punpcklbw %xmm4,%xmm4 # lowest 2 bytes
pshuflw $0,%xmm4,%xmm4 # lowest 8 bytes
punpcklqdq %xmm4,%xmm4 # all 16 bytes
# xmm4 now contains lowest metric in all 16 bytes
# subtract it from every output metric
psubusb %xmm4,%xmm0
psubusb %xmm4,%xmm1
psubusb %xmm4,%xmm2
psubusb %xmm4,%xmm3
movdqa %xmm0,(%rdi)
movdqa %xmm1,16(%rdi)
movdqa %xmm2,32(%rdi)
movdqa %xmm3,48(%rdi)
done:
# swap metrics
movq %rsi,%rax
movq %rdi,%rsi
movq %rax,%rdi
jmp 1b
2: movq 8(%rbp),%rbx # ebx = vp
# stash metric pointers
movq %rsi,OLDMETRICS(%rbx)
movq %rdi,NEWMETRICS(%rbx)
movq %rdx,DP(%rbx) # stash incremented value of vp->dp
xorq %rax,%rax
err: popq %rbx
popq %rdx
popq %rdi
popq %rsi
popq %rbp
ret
.data
.align 16
thirtyones:
.byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31

202
libfec/sse2bfly27.s Normal file
View File

@ -0,0 +1,202 @@
/* Intel SIMD (SSE2) implementations of Viterbi ACS butterflies
for 64-state (k=7) convolutional code
Copyright 2003 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
void update_viterbi27_blk_sse2(struct v27 *vp,unsigned char syms[],int nbits) ;
*/
# SSE2 (128-bit integer SIMD) version
# Requires Pentium 4 or better
# These are offsets into struct v27, defined in viterbi27.h
.set DP,128
.set OLDMETRICS,132
.set NEWMETRICS,136
.text
.global update_viterbi27_blk_sse2,Branchtab27_sse2
.type update_viterbi27_blk_sse2,@function
.align 16
update_viterbi27_blk_sse2:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
xorl %eax,%eax
movl 12(%ebp),%ebx # ebx = syms
movb (%ebx),%al
movd %eax,%xmm6 # xmm6[0] = first symbol
movb 1(%ebx),%al
movd %eax,%xmm5 # xmm5[0] = second symbol
addl $2,%ebx
movl %ebx,12(%ebp)
punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
punpcklbw %xmm5,%xmm5
pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
pshuflw $0,%xmm5,%xmm5
punpcklqdq %xmm6,%xmm6 # propagate to all 16
punpcklqdq %xmm5,%xmm5
# xmm6 now contains first symbol in each byte, xmm5 the second
movdqa thirtyones,%xmm7
# each invocation of this macro does 16 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movdqa Branchtab27_sse2+(16*\GROUP),%xmm4
movdqa Branchtab27_sse2+32+(16*\GROUP),%xmm3
pxor %xmm6,%xmm4
pxor %xmm5,%xmm3
# compute 5-bit branch metric in xmm4 by adding the individual symbol metrics
# This is okay for this
# code because the worst-case metric spread (at high Eb/No) is only 120,
# well within the range of our unsigned 8-bit path metrics, and even within
# the range of signed 8-bit path metrics
pavgb %xmm3,%xmm4
psrlw $3,%xmm4
pand %xmm7,%xmm4
movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
movdqa ((16*\GROUP)+32)(%esi),%xmm3 # Incoming path metric, high bit = 1
movdqa %xmm0,%xmm2
movdqa %xmm3,%xmm1
paddusb %xmm4,%xmm0 # note use of saturating arithmetic
paddusb %xmm4,%xmm3 # this shouldn't be necessary, but why not?
# negate branch metrics
pxor %xmm7,%xmm4
paddusb %xmm4,%xmm1
paddusb %xmm4,%xmm2
# Find survivors, leave in mm0,2
pminub %xmm1,%xmm0
pminub %xmm3,%xmm2
# get decisions, leave in mm1,3
pcmpeqb %xmm0,%xmm1
pcmpeqb %xmm2,%xmm3
# interleave and store new branch metrics in mm0,2
movdqa %xmm0,%xmm4
punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
movdqa %xmm0,(32*\GROUP+16)(%edi)
movdqa %xmm4,(32*\GROUP)(%edi)
# interleave decisions & store
movdqa %xmm1,%xmm4
punpckhbw %xmm3,%xmm1
punpcklbw %xmm3,%xmm4
# work around bug in gas due to Intel doc error
.byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
shll $16,%ebx
.byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
orl %eax,%ebx
movl %ebx,(4*\GROUP)(%edx)
.endm
# invoke macro 2 times for a total of 32 butterflies
butterfly GROUP=0
butterfly GROUP=1
addl $8,%edx # bump decision pointer
# See if we have to normalize. This requires an explanation. We don't want
# our path metrics to exceed 255 on the *next* iteration. Since the
# largest branch metric is 30, that means we don't want any to exceed 225
# on *this* iteration. Rather than look them all, we just pick an arbitrary one
# (the first) and see if it exceeds 225-120=105, where 120 is the experimentally-
# determined worst-case metric spread for this code and branch metrics in the range 0-30.
# This is extremely conservative, and empirical testing at a variety of Eb/Nos might
# show that a higher threshold could be used without affecting BER performance
movl (%edi),%eax # extract first output metric
andl $255,%eax
cmp $105,%eax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics. We can't just pick an arbitrary small constant because
# the minimum metric might be zero!
movdqa (%edi),%xmm0
movdqa %xmm0,%xmm4
movdqa 16(%edi),%xmm1
pminub %xmm1,%xmm4
movdqa 32(%edi),%xmm2
pminub %xmm2,%xmm4
movdqa 48(%edi),%xmm3
pminub %xmm3,%xmm4
# crunch down to single lowest metric
movdqa %xmm4,%xmm5
psrldq $8,%xmm5 # the count to psrldq is bytes, not bits!
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $32,%xmm5
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $16,%xmm5
pminub %xmm5,%xmm4
movdqa %xmm4,%xmm5
psrlq $8,%xmm5
pminub %xmm5,%xmm4 # now in lowest byte of %xmm4
punpcklbw %xmm4,%xmm4 # lowest 2 bytes
pshuflw $0,%xmm4,%xmm4 # lowest 8 bytes
punpcklqdq %xmm4,%xmm4 # all 16 bytes
# xmm4 now contains lowest metric in all 16 bytes
# subtract it from every output metric
psubusb %xmm4,%xmm0
psubusb %xmm4,%xmm1
psubusb %xmm4,%xmm2
psubusb %xmm4,%xmm3
movdqa %xmm0,(%edi)
movdqa %xmm1,16(%edi)
movdqa %xmm2,32(%edi)
movdqa %xmm3,48(%edi)
done:
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 16
thirtyones:
.byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31

254
libfec/sse2bfly29-64.s Normal file
View File

@ -0,0 +1,254 @@
/* Intel SIMD SSE2 implementation of Viterbi ACS butterflies
for 256-state (k=9) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
Modifications for x86_64, 2012 Matthias P. Braendli, HB9EGM
- changed registers to x86-64 equivalents
- changed instructions accordingly
- %rip indirect addressing needed for position independent code,
which is required because x86-64 needs dynamic libs to be PIC.
That still doesn't work
void update_viterbi29_blk_sse2(struct v29 *vp,unsigned char *syms,int nbits) ;
*/
# SSE2 (128-bit integer SIMD) version
# All X86-64 CPUs include SSE2
# These are offsets into struct v29, defined in viterbi29_av.c
.set DP,512
.set OLDMETRICS,516
.set NEWMETRICS,520
.text
.global update_viterbi29_blk_sse2,Branchtab29_sse2
.type update_viterbi29_blk_sse2,@function
.align 16
update_viterbi29_blk_sse2:
pushq %rbp
movq %rsp,%rbp
/* convention different between i386 and x86_64: rsi and rdi belong to called function, not caller */
/* Let's say we don't care (yet) */
pushq %rsi
pushq %rdi
pushq %rdx
pushq %rbx
movq 8(%rbp),%rdx # edx = vp
testq %rdx,%rdx
jnz 0f
movq -1,%rax
jmp err
0: movq OLDMETRICS(%rdx),%rsi # esi -> old metrics
movq NEWMETRICS(%rdx),%rdi # edi -> new metrics
movq DP(%rdx),%rdx # edx -> decisions
1: movq 16(%rbp),%rax # eax = nbits
decq %rax
jl 2f # passed zero, we're done
movq %rax,16(%rbp)
xorq %rax,%rax
movq 12(%rbp),%rbx # ebx = syms
movb (%rbx),%al
movd %rax,%xmm6 # xmm6[0] = first symbol
movb 1(%rbx),%al
movd %rax,%xmm5 # xmm5[0] = second symbol
addq $2,%rbx
movq %rbx,12(%rbp)
punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
punpcklbw %xmm5,%xmm5
movdqa thirtyones(%rip),%xmm7
pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
pshuflw $0,%xmm5,%xmm5
punpcklqdq %xmm6,%xmm6 # propagate to all 16
punpcklqdq %xmm5,%xmm5
# xmm6 now contains first symbol in each byte, xmm5 the second
movdqa thirtyones(%rip),%xmm7
# each invocation of this macro does 16 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movdqa Branchtab29_sse2+(16*\GROUP)(%rip),%xmm4
movdqa Branchtab29_sse2+128+(16*\GROUP)(%rip),%xmm3
pxor %xmm6,%xmm4
pxor %xmm5,%xmm3
pavgb %xmm3,%xmm4
psrlw $3,%xmm4
pand %xmm7,%xmm4 # xmm4 contains branch metrics
movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
movdqa ((16*\GROUP)+128)(%esi),%xmm3 # Incoming path metric, high bit = 1
movdqa %xmm0,%xmm2
movdqa %xmm3,%xmm1
paddusb %xmm4,%xmm0
paddusb %xmm4,%xmm3
# invert branch metrics
pxor %xmm7,%xmm4
paddusb %xmm4,%xmm1
paddusb %xmm4,%xmm2
# Find survivors, leave in mm0,2
pminub %xmm1,%xmm0
pminub %xmm3,%xmm2
# get decisions, leave in mm1,3
pcmpeqb %xmm0,%xmm1
pcmpeqb %xmm2,%xmm3
# interleave and store new branch metrics in mm0,2
movdqa %xmm0,%xmm4
punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
movdqa %xmm0,(32*\GROUP+16)(%rdi)
movdqa %xmm4,(32*\GROUP)(%rdi)
# interleave decisions & store
movdqa %xmm1,%xmm4
punpckhbw %xmm3,%xmm1
punpcklbw %xmm3,%xmm4
# work around bug in gas due to Intel doc error
.byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
shlq $16,%rbx
.byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
orq %rax,%rbx
movq %rbx,(4*\GROUP)(%rdx)
.endm
# invoke macro 8 times for a total of 128 butterflies
butterfly GROUP=0
butterfly GROUP=1
butterfly GROUP=2
butterfly GROUP=3
butterfly GROUP=4
butterfly GROUP=5
butterfly GROUP=6
butterfly GROUP=7
addq $32,%rdx # bump decision pointer
# see if we have to normalize
movq (%rdi),%rax # extract first output metric
andq $255,%rax
cmp $50,%rax # is it greater than 50?
movq $0,%rax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics
movdqa (%rdi),%xmm0
pminub 16(%rdi),%xmm0
pminub 32(%rdi),%xmm0
pminub 48(%rdi),%xmm0
pminub 64(%rdi),%xmm0
pminub 80(%rdi),%xmm0
pminub 96(%rdi),%xmm0
pminub 112(%rdi),%xmm0
pminub 128(%rdi),%xmm0
pminub 144(%rdi),%xmm0
pminub 160(%rdi),%xmm0
pminub 176(%rdi),%xmm0
pminub 192(%rdi),%xmm0
pminub 208(%rdi),%xmm0
pminub 224(%rdi),%xmm0
pminub 240(%rdi),%xmm0
# crunch down to single lowest metric
movdqa %xmm0,%xmm1
psrldq $8,%xmm0 # the count to psrldq is bytes, not bits!
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $32,%xmm0
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $16,%xmm0
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $8,%xmm0
pminub %xmm1,%xmm0
punpcklbw %xmm0,%xmm0 # lowest 2 bytes
pshuflw $0,%xmm0,%xmm0 # lowest 8 bytes
punpcklqdq %xmm0,%xmm0 # all 16 bytes
# xmm0 now contains lowest metric in all 16 bytes
# subtract it from every output metric
movdqa (%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,(%rdi)
movdqa 16(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,16(%rdi)
movdqa 32(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,32(%rdi)
movdqa 48(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,48(%rdi)
movdqa 64(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,64(%rdi)
movdqa 80(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,80(%rdi)
movdqa 96(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,96(%rdi)
movdqa 112(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,112(%rdi)
movdqa 128(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,128(%rdi)
movdqa 144(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,144(%rdi)
movdqa 160(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,160(%rdi)
movdqa 176(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,176(%rdi)
movdqa 192(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,192(%rdi)
movdqa 208(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,208(%rdi)
movdqa 224(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,224(%rdi)
movdqa 240(%rdi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,240(%rdi)
done:
# swap metrics
movq %rsi,%rax
movq %rdi,%rsi
movq %rax,%rdi
jmp 1b
2: movq 8(%rbp),%rbx # ebx = vp
# stash metric pointers
movq %rsi,OLDMETRICS(%rbx)
movq %rdi,NEWMETRICS(%rbx)
movq %rdx,DP(%rbx) # stash incremented value of vp->dp
xorq %rax,%rax
err: popq %rbx
popq %rdx
popq %rdi
popq %rsi
popq %rbp
ret
.data
.align 16
thirtyones:
.byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31

245
libfec/sse2bfly29.s Normal file
View File

@ -0,0 +1,245 @@
/* Intel SIMD SSE2 implementation of Viterbi ACS butterflies
for 256-state (k=9) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
void update_viterbi29_blk_sse2(struct v29 *vp,unsigned char *syms,int nbits) ;
*/
# SSE2 (128-bit integer SIMD) version
# Requires Pentium 4 or better
# These are offsets into struct v29, defined in viterbi29.h
.set DP,512
.set OLDMETRICS,516
.set NEWMETRICS,520
.text
.global update_viterbi29_blk_sse2,Branchtab29_sse2
.type update_viterbi29_blk_sse2,@function
.align 16
update_viterbi29_blk_sse2:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
xorl %eax,%eax
movl 12(%ebp),%ebx # ebx = syms
movb (%ebx),%al
movd %eax,%xmm6 # xmm6[0] = first symbol
movb 1(%ebx),%al
movd %eax,%xmm5 # xmm5[0] = second symbol
addl $2,%ebx
movl %ebx,12(%ebp)
punpcklbw %xmm6,%xmm6 # xmm6[1] = xmm6[0]
punpcklbw %xmm5,%xmm5
movdqa thirtyones,%xmm7
pshuflw $0,%xmm6,%xmm6 # copy low word to low 3
pshuflw $0,%xmm5,%xmm5
punpcklqdq %xmm6,%xmm6 # propagate to all 16
punpcklqdq %xmm5,%xmm5
# xmm6 now contains first symbol in each byte, xmm5 the second
movdqa thirtyones,%xmm7
# each invocation of this macro does 16 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movdqa Branchtab29_sse2+(16*\GROUP),%xmm4
movdqa Branchtab29_sse2+128+(16*\GROUP),%xmm3
pxor %xmm6,%xmm4
pxor %xmm5,%xmm3
pavgb %xmm3,%xmm4
psrlw $3,%xmm4
pand %xmm7,%xmm4 # xmm4 contains branch metrics
movdqa (16*\GROUP)(%esi),%xmm0 # Incoming path metric, high bit = 0
movdqa ((16*\GROUP)+128)(%esi),%xmm3 # Incoming path metric, high bit = 1
movdqa %xmm0,%xmm2
movdqa %xmm3,%xmm1
paddusb %xmm4,%xmm0
paddusb %xmm4,%xmm3
# invert branch metrics
pxor %xmm7,%xmm4
paddusb %xmm4,%xmm1
paddusb %xmm4,%xmm2
# Find survivors, leave in mm0,2
pminub %xmm1,%xmm0
pminub %xmm3,%xmm2
# get decisions, leave in mm1,3
pcmpeqb %xmm0,%xmm1
pcmpeqb %xmm2,%xmm3
# interleave and store new branch metrics in mm0,2
movdqa %xmm0,%xmm4
punpckhbw %xmm2,%xmm0 # interleave second 16 new metrics
punpcklbw %xmm2,%xmm4 # interleave first 16 new metrics
movdqa %xmm0,(32*\GROUP+16)(%edi)
movdqa %xmm4,(32*\GROUP)(%edi)
# interleave decisions & store
movdqa %xmm1,%xmm4
punpckhbw %xmm3,%xmm1
punpcklbw %xmm3,%xmm4
# work around bug in gas due to Intel doc error
.byte 0x66,0x0f,0xd7,0xd9 # pmovmskb %xmm1,%ebx
shll $16,%ebx
.byte 0x66,0x0f,0xd7,0xc4 # pmovmskb %xmm4,%eax
orl %eax,%ebx
movl %ebx,(4*\GROUP)(%edx)
.endm
# invoke macro 8 times for a total of 128 butterflies
butterfly GROUP=0
butterfly GROUP=1
butterfly GROUP=2
butterfly GROUP=3
butterfly GROUP=4
butterfly GROUP=5
butterfly GROUP=6
butterfly GROUP=7
addl $32,%edx # bump decision pointer
# see if we have to normalize
movl (%edi),%eax # extract first output metric
andl $255,%eax
cmp $50,%eax # is it greater than 50?
movl $0,%eax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics
movdqa (%edi),%xmm0
pminub 16(%edi),%xmm0
pminub 32(%edi),%xmm0
pminub 48(%edi),%xmm0
pminub 64(%edi),%xmm0
pminub 80(%edi),%xmm0
pminub 96(%edi),%xmm0
pminub 112(%edi),%xmm0
pminub 128(%edi),%xmm0
pminub 144(%edi),%xmm0
pminub 160(%edi),%xmm0
pminub 176(%edi),%xmm0
pminub 192(%edi),%xmm0
pminub 208(%edi),%xmm0
pminub 224(%edi),%xmm0
pminub 240(%edi),%xmm0
# crunch down to single lowest metric
movdqa %xmm0,%xmm1
psrldq $8,%xmm0 # the count to psrldq is bytes, not bits!
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $32,%xmm0
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $16,%xmm0
pminub %xmm1,%xmm0
movdqa %xmm0,%xmm1
psrlq $8,%xmm0
pminub %xmm1,%xmm0
punpcklbw %xmm0,%xmm0 # lowest 2 bytes
pshuflw $0,%xmm0,%xmm0 # lowest 8 bytes
punpcklqdq %xmm0,%xmm0 # all 16 bytes
# xmm0 now contains lowest metric in all 16 bytes
# subtract it from every output metric
movdqa (%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,(%edi)
movdqa 16(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,16(%edi)
movdqa 32(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,32(%edi)
movdqa 48(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,48(%edi)
movdqa 64(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,64(%edi)
movdqa 80(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,80(%edi)
movdqa 96(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,96(%edi)
movdqa 112(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,112(%edi)
movdqa 128(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,128(%edi)
movdqa 144(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,144(%edi)
movdqa 160(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,160(%edi)
movdqa 176(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,176(%edi)
movdqa 192(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,192(%edi)
movdqa 208(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,208(%edi)
movdqa 224(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,224(%edi)
movdqa 240(%edi),%xmm1
psubusb %xmm0,%xmm1
movdqa %xmm1,240(%edi)
done:
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 16
thirtyones:
.byte 31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31

205
libfec/ssebfly27.s Normal file
View File

@ -0,0 +1,205 @@
/* Intel SIMD (SSE) implementation of Viterbi ACS butterflies
for 64-state (k=7) convolutional code
Copyright 2001 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
int update_viterbi27_blk_sse(struct v27 *vp,unsigned char syms[],int nbits) ;
*/
# SSE (64-bit integer SIMD) version
# Requires Pentium III or better
# These are offsets into struct v27, defined in viterbi27.h
.set DP,128
.set OLDMETRICS,132
.set NEWMETRICS,136
.text
.global update_viterbi27_blk_sse,Branchtab27_sse
.type update_viterbi27_blk_sse,@function
.align 16
update_viterbi27_blk_sse:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
xorl %eax,%eax
movl 12(%ebp),%ebx # %ebx = syms
movb (%ebx),%al
movd %eax,%mm6 # mm6[0] = first symbol
movb 1(%ebx),%al
movd %eax,%mm5 # mm5[0] = second symbol
addl $2,%ebx
movl %ebx,12(%ebp)
punpcklbw %mm6,%mm6 # mm6[1] = mm6[0]
punpcklbw %mm5,%mm5
movq thirtyones,%mm7
pshufw $0,%mm6,%mm6 # copy low word to upper 3
pshufw $0,%mm5,%mm5
# mm6 now contains first symbol in each byte, mm5 the second
# each invocation of this macro does 8 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movq Branchtab27_sse+(8*\GROUP),%mm4
movq Branchtab27_sse+32+(8*\GROUP),%mm3
pxor %mm6,%mm4
pxor %mm5,%mm3
pavgb %mm3,%mm4 # mm4 contains branch metrics
psrlw $3,%mm4
pand %mm7,%mm4
movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0
movq ((8*\GROUP)+32)(%esi),%mm3 # Incoming path metric, high bit = 1
movq %mm0,%mm2
movq %mm3,%mm1
paddusb %mm4,%mm0
paddusb %mm4,%mm3
# invert branch metrics. This works only because they're 5 bits
pxor %mm7,%mm4
paddusb %mm4,%mm1
paddusb %mm4,%mm2
# Find survivors, leave in mm0,2
pminub %mm1,%mm0
pminub %mm3,%mm2
# get decisions, leave in mm1,3
pcmpeqb %mm0,%mm1
pcmpeqb %mm2,%mm3
# interleave and store new branch metrics in mm0,2
movq %mm0,%mm4
punpckhbw %mm2,%mm0 # interleave second 8 new metrics
punpcklbw %mm2,%mm4 # interleave first 8 new metrics
movq %mm0,(16*\GROUP+8)(%edi)
movq %mm4,(16*\GROUP)(%edi)
# interleave decisions, accumulate into %ebx
movq %mm1,%mm4
punpckhbw %mm3,%mm1
punpcklbw %mm3,%mm4
# Due to an error in the Intel instruction set ref (the register
# fields are swapped), gas assembles pmovmskb incorrectly
# See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html
.byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax
shll $((16*\GROUP+8)&31),%eax
orl %eax,%ebx
.byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax
shll $((16*\GROUP)&31),%eax
orl %eax,%ebx
.endm
# invoke macro 4 times for a total of 32 butterflies
xorl %ebx,%ebx # clear decisions
butterfly GROUP=0
butterfly GROUP=1
movl %ebx,(%edx) # stash first 32 decisions
xorl %ebx,%ebx
butterfly GROUP=2
butterfly GROUP=3
movl %ebx,4(%edx) # stash second 32 decisions
addl $8,%edx # bump decision pointer
# see if we have to normalize
movl (%edi),%eax # extract first output metric
andl $255,%eax
cmpl $150,%eax # is it greater than 150?
movl $0,%eax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics
movq (%edi),%mm0
pminub 8(%edi),%mm0
pminub 16(%edi),%mm0
pminub 24(%edi),%mm0
pminub 32(%edi),%mm0
pminub 40(%edi),%mm0
pminub 48(%edi),%mm0
pminub 56(%edi),%mm0
# mm0 contains 8 smallest metrics
# crunch down to single lowest metric
movq %mm0,%mm1
psrlq $32,%mm0
pminub %mm1,%mm0
movq %mm0,%mm1
psrlq $16,%mm0
pminub %mm1,%mm0
movq %mm0,%mm1
psrlq $8,%mm0
pminub %mm1,%mm0
punpcklbw %mm0,%mm0 # expand to all 8 bytes
pshufw $0,%mm0,%mm0
# mm0 now contains lowest metric in all 8 bytes
# subtract it from every output metric
# Trashes %mm7
.macro PSUBUSBM REG,MEM
movq \MEM,%mm7
psubusb \REG,%mm7
movq %mm7,\MEM
.endm
PSUBUSBM %mm0,(%edi)
PSUBUSBM %mm0,8(%edi)
PSUBUSBM %mm0,16(%edi)
PSUBUSBM %mm0,24(%edi)
PSUBUSBM %mm0,32(%edi)
PSUBUSBM %mm0,40(%edi)
PSUBUSBM %mm0,48(%edi)
PSUBUSBM %mm0,56(%edi)
movd %mm0,%eax
and $0xff,%eax
done: # swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: emms
movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 16
thirtyones:
.byte 31,31,31,31,31,31,31,31

271
libfec/ssebfly29.s Normal file
View File

@ -0,0 +1,271 @@
/* Intel SIMD SSE implementation of Viterbi ACS butterflies
for 256-state (k=9) convolutional code
Copyright 2004 Phil Karn, KA9Q
This code may be used under the terms of the GNU Lesser General Public License (LGPL)
void update_viterbi29_blk_sse(struct v29 *vp,unsigned char syms[],int nbits);
*/
# SSE (64-bit integer SIMD) version
# Requires Pentium III or better
# These are offsets into struct v29, defined in viterbi29.h
.set DP,512
.set OLDMETRICS,516
.set NEWMETRICS,520
.text
.global update_viterbi29_blk_sse,Branchtab29_sse
.type update_viterbi29_blk_sse,@function
.align 16
update_viterbi29_blk_sse:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %edi
pushl %edx
pushl %ebx
movl 8(%ebp),%edx # edx = vp
testl %edx,%edx
jnz 0f
movl -1,%eax
jmp err
0: movl OLDMETRICS(%edx),%esi # esi -> old metrics
movl NEWMETRICS(%edx),%edi # edi -> new metrics
movl DP(%edx),%edx # edx -> decisions
1: movl 16(%ebp),%eax # eax = nbits
decl %eax
jl 2f # passed zero, we're done
movl %eax,16(%ebp)
xorl %eax,%eax
movl 12(%ebp),%ebx # ebx = syms
movb (%ebx),%al
movd %eax,%mm6 # mm6[0] = first symbol
movb 1(%ebx),%al
movd %eax,%mm5 # mm5[0] = second symbol
addl $2,%ebx
movl %ebx,12(%ebp)
punpcklbw %mm6,%mm6 # mm6[1] = mm6[0]
punpcklbw %mm5,%mm5
movq thirtyones,%mm7
pshufw $0,%mm6,%mm6 # copy low word to upper 3
pshufw $0,%mm5,%mm5
# mm6 now contains first symbol in each byte, mm5 the second
# each invocation of this macro does 8 butterflies in parallel
.MACRO butterfly GROUP
# compute branch metrics
movq Branchtab29_sse+(8*\GROUP),%mm4
movq Branchtab29_sse+128+(8*\GROUP),%mm3
pxor %mm6,%mm4
pxor %mm5,%mm3
pavgb %mm3,%mm4 # mm4 contains branch metrics
psrlw $3,%mm4
pand %mm7,%mm4
movq (8*\GROUP)(%esi),%mm0 # Incoming path metric, high bit = 0
movq ((8*\GROUP)+128)(%esi),%mm3 # Incoming path metric, high bit = 1
movq %mm0,%mm2
movq %mm3,%mm1
paddusb %mm4,%mm0
paddusb %mm4,%mm3
# invert branch metrics. This works only because they're 5 bits
pxor %mm7,%mm4
paddusb %mm4,%mm1
paddusb %mm4,%mm2
# Find survivors, leave in mm0,2
pminub %mm1,%mm0
pminub %mm3,%mm2
# get decisions, leave in mm1,3
pcmpeqb %mm0,%mm1
pcmpeqb %mm2,%mm3
# interleave and store new branch metrics in mm0,2
movq %mm0,%mm4
punpckhbw %mm2,%mm0 # interleave second 8 new metrics
punpcklbw %mm2,%mm4 # interleave first 8 new metrics
movq %mm0,(16*\GROUP+8)(%edi)
movq %mm4,(16*\GROUP)(%edi)
# interleave decisions, accumulate into %ebx
movq %mm1,%mm4
punpckhbw %mm3,%mm1
punpcklbw %mm3,%mm4
# Due to an error in the Intel instruction set ref (the register
# fields are swapped), gas assembles pmovmskb incorrectly
# See http://mail.gnu.org/pipermail/bug-gnu-utils/2000-August/002341.html
.byte 0x0f,0xd7,0xc1 # pmovmskb %mm1,%eax
shll $((16*\GROUP+8)&31),%eax
orl %eax,%ebx
.byte 0x0f,0xd7,0xc4 # pmovmskb %mm4,%eax
shll $((16*\GROUP)&31),%eax
orl %eax,%ebx
.endm
# invoke macro 16 times for a total of 128 butterflies
xorl %ebx,%ebx # clear decisions
butterfly GROUP=0
butterfly GROUP=1
movl %ebx,(%edx) # stash first 32 decisions
xorl %ebx,%ebx
butterfly GROUP=2
butterfly GROUP=3
movl %ebx,4(%edx) # stash second 32 decisions
xorl %ebx,%ebx # clear decisions
butterfly GROUP=4
butterfly GROUP=5
movl %ebx,8(%edx) # stash first 32 decisions
xorl %ebx,%ebx
butterfly GROUP=6
butterfly GROUP=7
movl %ebx,12(%edx) # stash second 32 decisions
xorl %ebx,%ebx # clear decisions
butterfly GROUP=8
butterfly GROUP=9
movl %ebx,16(%edx) # stash first 32 decisions
xorl %ebx,%ebx
butterfly GROUP=10
butterfly GROUP=11
movl %ebx,20(%edx) # stash second 32 decisions
xorl %ebx,%ebx # clear decisions
butterfly GROUP=12
butterfly GROUP=13
movl %ebx,24(%edx) # stash first 32 decisions
xorl %ebx,%ebx
butterfly GROUP=14
butterfly GROUP=15
movl %ebx,28(%edx) # stash second 32 decisions
addl $32,%edx # bump decision pointer
# see if we have to normalize
movl (%edi),%eax # extract first output metric
andl $255,%eax
cmp $50,%eax # is it greater than 50?
movl $0,%eax
jle done # No, no need to normalize
# Normalize by finding smallest metric and subtracting it
# from all metrics
movq (%edi),%mm0
pminub 8(%edi),%mm0
pminub 16(%edi),%mm0
pminub 24(%edi),%mm0
pminub 32(%edi),%mm0
pminub 40(%edi),%mm0
pminub 48(%edi),%mm0
pminub 56(%edi),%mm0
pminub 64(%edi),%mm0
pminub 72(%edi),%mm0
pminub 80(%edi),%mm0
pminub 88(%edi),%mm0
pminub 96(%edi),%mm0
pminub 104(%edi),%mm0
pminub 112(%edi),%mm0
pminub 120(%edi),%mm0
pminub 128(%edi),%mm0
pminub 136(%edi),%mm0
pminub 144(%edi),%mm0
pminub 152(%edi),%mm0
pminub 160(%edi),%mm0
pminub 168(%edi),%mm0
pminub 176(%edi),%mm0
pminub 184(%edi),%mm0
pminub 192(%edi),%mm0
pminub 200(%edi),%mm0
pminub 208(%edi),%mm0
pminub 216(%edi),%mm0
pminub 224(%edi),%mm0
pminub 232(%edi),%mm0
pminub 240(%edi),%mm0
pminub 248(%edi),%mm0
# mm0 contains 8 smallest metrics
# crunch down to single lowest metric
movq %mm0,%mm1
psrlq $32,%mm0
pminub %mm1,%mm0
movq %mm0,%mm1
psrlq $16,%mm0
pminub %mm1,%mm0
movq %mm0,%mm1
psrlq $8,%mm0
pminub %mm1,%mm0
movq 8(%edi),%mm1 # reload
punpcklbw %mm0,%mm0 # expand to all 8 bytes
pshufw $0,%mm0,%mm0
# mm0 now contains lowest metric in all 8 bytes
# subtract it from every output metric
# Trashes %mm7
.macro PSUBUSBM REG,MEM
movq \MEM,%mm7
psubusb \REG,%mm7
movq %mm7,\MEM
.endm
PSUBUSBM %mm0,(%edi)
PSUBUSBM %mm0,8(%edi)
PSUBUSBM %mm0,16(%edi)
PSUBUSBM %mm0,24(%edi)
PSUBUSBM %mm0,32(%edi)
PSUBUSBM %mm0,40(%edi)
PSUBUSBM %mm0,48(%edi)
PSUBUSBM %mm0,56(%edi)
PSUBUSBM %mm0,64(%edi)
PSUBUSBM %mm0,72(%edi)
PSUBUSBM %mm0,80(%edi)
PSUBUSBM %mm0,88(%edi)
PSUBUSBM %mm0,96(%edi)
PSUBUSBM %mm0,104(%edi)
PSUBUSBM %mm0,112(%edi)
PSUBUSBM %mm0,120(%edi)
PSUBUSBM %mm0,128(%edi)
PSUBUSBM %mm0,136(%edi)
PSUBUSBM %mm0,144(%edi)
PSUBUSBM %mm0,152(%edi)
PSUBUSBM %mm0,160(%edi)
PSUBUSBM %mm0,168(%edi)
PSUBUSBM %mm0,176(%edi)
PSUBUSBM %mm0,184(%edi)
PSUBUSBM %mm0,192(%edi)
PSUBUSBM %mm0,200(%edi)
PSUBUSBM %mm0,208(%edi)
PSUBUSBM %mm0,216(%edi)
PSUBUSBM %mm0,224(%edi)
PSUBUSBM %mm0,232(%edi)
PSUBUSBM %mm0,240(%edi)
PSUBUSBM %mm0,248(%edi)
done:
# swap metrics
movl %esi,%eax
movl %edi,%esi
movl %eax,%edi
jmp 1b
2: emms
movl 8(%ebp),%ebx # ebx = vp
# stash metric pointers
movl %esi,OLDMETRICS(%ebx)
movl %edi,NEWMETRICS(%ebx)
movl %edx,DP(%ebx) # stash incremented value of vp->dp
xorl %eax,%eax
err: popl %ebx
popl %edx
popl %edi
popl %esi
popl %ebp
ret
.data
.align 8
thirtyones:
.byte 31,31,31,31,31,31,31,31

50
libfec/sumsq.c Normal file
View File

@ -0,0 +1,50 @@
/* Compute the sum of the squares of a vector of signed shorts
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdlib.h>
#include "fec.h"
unsigned long long sumsq_port(signed short *,int);
#ifdef __i386__
unsigned long long sumsq_mmx(signed short *,int);
unsigned long long sumsq_sse(signed short *,int);
unsigned long long sumsq_sse2(signed short *,int);
#endif
#ifdef __x86_64__
unsigned long long sumsq_sse2(signed short *,int);
#endif
#ifdef __VEC__
unsigned long long sumsq_av(signed short *,int);
#endif
unsigned long long sumsq(signed short *in,int cnt){
switch(Cpu_mode){
case PORT:
default:
return sumsq_port(in,cnt);
#ifdef __i386__
case SSE:
case MMX:
return sumsq_mmx(in,cnt);
case SSE2:
return sumsq_sse2(in,cnt);
#endif
#ifdef __x86_64__
case SSE2:
return sumsq_port(in,cnt);
//return sumsq_sse2(in,cnt);
#endif
#ifdef __VEC__
case ALTIVEC:
return sumsq_av(in,cnt);
#endif
}
}

78
libfec/sumsq_av.c Normal file
View File

@ -0,0 +1,78 @@
/* Compute the sum of the squares of a vector of signed shorts
* This is the Altivec SIMD version. It's a little hairy because Altivec
* does not do 64-bit operations directly, so we have to accumulate separate
* 32-bit sums and carries
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include "fec.h"
unsigned long long sumsq_av(signed short *in,int cnt){
long long sum;
vector signed short x;
vector unsigned int sums,carries,s1,s2;
int pad;
union { vector unsigned char cv; vector unsigned int iv; unsigned int w[4]; unsigned char c[16];} s;
carries = sums = (vector unsigned int)(0);
if((pad = (int)in & 15)!=0){
/* Load unaligned leading word */
x = vec_perm(vec_ld(0,in),(vector signed short)(0),vec_lvsl(0,in));
if(cnt < 8){ /* Shift right to chop stuff beyond end of short block */
s.c[15] = (8-cnt)<<4;
x = vec_sro(x,s.cv);
}
sums = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
in += 8-pad/2;
cnt -= 8-pad/2;
}
/* Everything is now aligned, rip through most of the block */
while(cnt >= 8){
x = vec_ld(0,in);
/* A single vec_msum cannot overflow, but we have to sum it with
* the earlier terms separately to handle the carries
* The cast to unsigned is OK because squares are always positive
*/
s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
carries = vec_add(carries,vec_addc(sums,s1));
sums = vec_add(sums,s1);
in += 8;
cnt -= 8;
}
/* Handle trailing fragment, if any */
if(cnt > 0){
x = vec_ld(0,in);
s.c[15] = (8-cnt)<<4;
x = vec_sro(x,s.cv);
s1 = (vector unsigned int)vec_msum(x,x,(vector signed int)(0));
carries = vec_add(carries,vec_addc(sums,s1));
sums = vec_add(sums,s1);
}
/* Combine 4 sub-sums and carries */
s.c[15] = 64; /* Shift right two 32-bit words */
s1 = vec_sro(sums,s.cv);
s2 = vec_sro(carries,s.cv);
carries = vec_add(carries,vec_addc(sums,s1));
sums = vec_add(sums,s1);
carries = vec_add(carries,s2);
s.c[15] = 32; /* Shift right one 32-bit word */
s1 = vec_sro(sums,s.cv);
s2 = vec_sro(carries,s.cv);
carries = vec_add(carries,vec_addc(sums,s1));
sums = vec_add(sums,s1);
carries = vec_add(carries,s2);
/* Extract sum and carries from right-hand words and combine into result */
s.iv = sums;
sum = s.w[3];
s.iv = carries;
sum += (long long)s.w[3] << 32;
return sum;
}

35
libfec/sumsq_mmx.c Normal file
View File

@ -0,0 +1,35 @@
/* Compute the sum of the squares of a vector of signed shorts
* MMX-assisted version (also used on SSE)
* The SSE2 and MMX assist routines both operate on multiples of
* 8 words; they differ only in their alignment requirements (8 bytes
* for MMX, 16 bytes for SSE2)
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser Public License (LGPL)
*/
long long sumsq_mmx_assist(signed short *,int);
long long sumsq_mmx(signed short *in,int cnt){
long long sum = 0;
/* Handle stuff before the next 8-byte boundary */
while(((int)in & 7) != 0 && cnt != 0){
sum += (long)in[0] * in[0];
in++;
cnt--;
}
sum += sumsq_mmx_assist(in,cnt);
in += cnt & ~7;
cnt &= 7;
/* Handle up to 7 words at end */
while(cnt != 0){
sum += (long)in[0] * in[0];
in++;
cnt--;
}
return sum;
}

83
libfec/sumsq_mmx_assist.s Normal file
View File

@ -0,0 +1,83 @@
# MMX assist routines for sumsq
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Public License (GPL)
.text
# Evaluate sum of squares of signed 16-bit input samples
# long long sumsq_mmx_assist(signed short *in,int cnt);
.global sumsq_mmx_assist
.type sumsq_mmx_assist,@function
.align 16
sumsq_mmx_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
pushl %ebx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
xor %eax,%eax
xor %edx,%edx
# Since 4 * 32767**2 < 2**32, we can accumulate two at a time
1: subl $8,%ecx
jl 2f
movq (%esi),%mm0 # S0 S1 S2 S3
pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2)
movq 8(%esi),%mm6 # S4 S5 S6 S7
pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2)
paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2)
movd %mm0,%ebx
addl %ebx,%eax
adcl $0,%edx
psrlq $32,%mm0
movd %mm0,%ebx
addl %ebx,%eax
adcl $0,%edx
addl $16,%esi
jmp 1b
2: emms
popl %ebx
popl %ecx
popl %esi
popl %ebp
ret
# Evaluate sum of squares of signed 16-bit input samples
# long sumsq_wd_mmx_assist(signed short *in,int cnt);
# Quick version, only safe for small numbers of small input values...
.global sumsq_wd_mmx_assist
.type sumsq_wd_mmx_assist,@function
.align 16
sumsq_wd_mmx_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %mm2,%mm2 # zero sum
1: subl $8,%ecx
jl 2f
movq (%esi),%mm0 # S0 S1 S2 S3
pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3)
movq 8(%esi),%mm1
pmaddwd %mm1,%mm1
paddd %mm1,%mm2
paddd %mm0,%mm2 # accumulate
addl $16,%esi
jmp 1b
2: movd %mm2,%eax # even sum
psrlq $32,%mm2
movd %mm2,%edx # odd sum
addl %edx,%eax
emms
popl %esi
popl %ebp
ret

16
libfec/sumsq_port.c Normal file
View File

@ -0,0 +1,16 @@
/* Compute the sum of the squares of a vector of signed shorts
* Portable C version
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
unsigned long long sumsq_port(signed short *in,int cnt){
long long sum = 0;
int i;
for(i=0;i<cnt;i++){
sum += (int)in[i] * (int)in[i];
}
return sum;
}

33
libfec/sumsq_sse2.c Normal file
View File

@ -0,0 +1,33 @@
/* Compute the sum of the squares of a vector of signed shorts
* The SSE2 and MMX assist routines both operate on multiples of
* 8 words; they differ only in their alignment requirements (8 bytes
* for MMX, 16 bytes for SSE2)
* Copyright 2004 Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser Public License (LGPL)
*/
long long sumsq_sse2_assist(signed short *,int);
long long sumsq_sse2(signed short *in,int cnt){
long long sum = 0;
/* Handle stuff before the next 8-byte boundary */
while(((int)in & 15) != 0 && cnt != 0){
sum += (long)in[0] * in[0];
in++;
cnt--;
}
sum += sumsq_sse2_assist(in,cnt);
in += cnt & ~7;
cnt &= 7;
/* Handle up to 7 trailing words */
while(cnt != 0){
sum += (long)in[0] * in[0];
in++;
cnt--;
}
return sum;
}

View File

@ -0,0 +1,49 @@
# SSE2 assist routines for sumsq
# Copyright 2001 Phil Karn, KA9Q
# May be used under the terms of the GNU Public License (GPL)
.text
# Evaluate sum of squares of signed 16-bit input samples
# long long sumsq_sse2_assist(signed short *in,int cnt);
.global sumsq_sse2_assist
.type sumsq_sse2_assist,@function
.align 16
sumsq_sse2_assist:
pushl %ebp
movl %esp,%ebp
pushl %esi
pushl %ecx
movl 8(%ebp),%esi
movl 12(%ebp),%ecx
pxor %xmm2,%xmm2 # zero sum
movaps low,%xmm3 # load mask
1: subl $8,%ecx
jl 2f
movaps (%esi),%xmm0 # S0 S1 S2 S3 S4 S5 S6 S7
pmaddwd %xmm0,%xmm0 # (S0*S0+S1*S1) (S2*S2+S3*S3) (S4*S4+S5*S5) (S6*S6+S7*S7)
movaps %xmm0,%xmm1
pand %xmm3,%xmm1 # (S0*S0+S1*S1) 0 (S4*S4+S5*S5) 0
paddq %xmm1,%xmm2 # sum even-numbered dwords
psrlq $32,%xmm0 # (S2*S2+S3*S3) 0 (S6*S6+S7*S7) 0
paddq %xmm0,%xmm2 # sum odd-numbered dwords
addl $16,%esi
jmp 1b
2: movaps %xmm2,%xmm0
psrldq $8,%xmm0
paddq %xmm2,%xmm0 # combine 64-bit sums
movd %xmm0,%eax # low 32 bits of sum
psrldq $4,%xmm0
movd %xmm0,%edx # high 32 bits of sum
popl %ecx
popl %esi
popl %ebp
ret
.data
.align 16
low: .byte 255,255,255,255,0,0,0,0,255,255,255,255,0,0,0,0

101
libfec/sumsq_test.c Normal file
View File

@ -0,0 +1,101 @@
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <time.h>
#include "config.h"
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#include "fec.h"
#if HAVE_GETOPT_LONG
struct option Options[] = {
{"frame-length",1,NULL,'l'},
{"frame-count",1,NULL,'n'},
{"verbose",0,NULL,'v'},
{"force-altivec",0,NULL,'a'},
{"force-port",0,NULL,'p'},
{"force-mmx",0,NULL,'m'},
{"force-sse",0,NULL,'s'},
{"force-sse2",0,NULL,'t'},
{NULL},
};
#endif
int Verbose = 0;
int main(int argc,char *argv[]){
signed short *buf;
int i,d,trial,trials=10000;
int bufsize = 2048;
long long port_sum,simd_sum;
time_t t;
int timetrials=0;
find_cpu_mode();
time(&t);
srandom(t);
#if HAVE_GETOPT_LONG
while((d = getopt_long(argc,argv,"vapmstl:n:T",Options,NULL)) != EOF){
#else
while((d = getopt(argc,argv,"vapmstl:n:T")) != EOF){
#endif
switch(d){
case 'a':
Cpu_mode = ALTIVEC;
break;
case 'p':
Cpu_mode = PORT;
break;
case 'm':
Cpu_mode = MMX;
break;
case 's':
Cpu_mode = SSE;
break;
case 't':
Cpu_mode = SSE2;
break;
case 'l':
bufsize = atoi(optarg);
break;
case 'n':
trials = atoi(optarg);
break;
case 'v':
Verbose++;
break;
case 'T':
timetrials++;
break;
}
}
buf = (signed short *)calloc(bufsize,sizeof(signed short));
if(timetrials){
for(trial=0;trial<trials;trial++){
(void)sumsq(buf,bufsize);
}
} else {
for(trial=0;trial<trials;trial++){
int length,offset;
offset = random() & 7;
length = (random() % bufsize) - offset;
if(length <= 0)
continue;
for(i=0;i<bufsize;i++)
buf[i] = random();
port_sum = sumsq_port(buf+offset,length);
simd_sum = sumsq(buf+offset,length);
if(port_sum != simd_sum){
printf("offset %d len %d port_sum = %lld simd_sum = %lld ",offset,length,port_sum,simd_sum);
printf("ERROR! diff = %lld\n",simd_sum-port_sum);
}
}
}
exit(0);
}

188
libfec/viterbi27.c Normal file
View File

@ -0,0 +1,188 @@
/* K=7 r=1/2 Viterbi decoder with optional Intel or PowerPC SIMD
* Copyright Feb 2004, Phil Karn, KA9Q
*/
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "fec.h"
/* Create a new instance of a Viterbi decoder */
void *create_viterbi27(int len){
find_cpu_mode();
switch(Cpu_mode){
case PORT:
default:
return create_viterbi27_port(len);
#ifdef __VEC__
case ALTIVEC:
return create_viterbi27_av(len);
#endif
#ifdef __i386__
case MMX:
return create_viterbi27_mmx(len);
case SSE:
return create_viterbi27_sse(len);
case SSE2:
return create_viterbi27_sse2(len);
#endif
#ifdef __x86_64__
case SSE2:
return create_viterbi27_port(len);
#endif
}
}
void set_viterbi27_polynomial(int polys[2]){
switch(Cpu_mode){
case PORT:
default:
set_viterbi27_polynomial_port(polys);
break;
#ifdef __VEC__
case ALTIVEC:
set_viterbi27_polynomial_av(polys);
break;
#endif
#ifdef __i386__
case MMX:
set_viterbi27_polynomial_mmx(polys);
break;
case SSE:
set_viterbi27_polynomial_sse(polys);
break;
case SSE2:
set_viterbi27_polynomial_sse2(polys);
break;
#endif
#ifdef __x86_64__
case SSE2:
set_viterbi27_polynomial_port(polys);
break;
#endif
}
}
/* Initialize Viterbi decoder for start of new frame */
int init_viterbi27(void *p,int starting_state){
switch(Cpu_mode){
case PORT:
default:
return init_viterbi27_port(p,starting_state);
#ifdef __VEC__
case ALTIVEC:
return init_viterbi27_av(p,starting_state);
#endif
#ifdef __i386__
case MMX:
return init_viterbi27_mmx(p,starting_state);
case SSE:
return init_viterbi27_sse(p,starting_state);
case SSE2:
return init_viterbi27_sse2(p,starting_state);
#endif
#ifdef __x86_64__
case SSE2:
return init_viterbi27_port(p,starting_state);
#endif
}
}
/* Viterbi chainback */
int chainback_viterbi27(
void *p,
unsigned char *data, /* Decoded output data */
unsigned int nbits, /* Number of data bits */
unsigned int endstate){ /* Terminal encoder state */
switch(Cpu_mode){
case PORT:
default:
return chainback_viterbi27_port(p,data,nbits,endstate);
#ifdef __VEC__
case ALTIVEC:
return chainback_viterbi27_av(p,data,nbits,endstate);
#endif
#ifdef __i386__
case MMX:
return chainback_viterbi27_mmx(p,data,nbits,endstate);
case SSE:
return chainback_viterbi27_sse(p,data,nbits,endstate);
case SSE2:
return chainback_viterbi27_sse2(p,data,nbits,endstate);
#endif
#ifdef __x86_64__
case SSE2:
return chainback_viterbi27_port(p,data,nbits,endstate);
#endif
}
}
/* Delete instance of a Viterbi decoder */
void delete_viterbi27(void *p){
switch(Cpu_mode){
case PORT:
default:
delete_viterbi27_port(p);
break;
#ifdef __VEC__
case ALTIVEC:
delete_viterbi27_av(p);
break;
#endif
#ifdef __i386__
case MMX:
delete_viterbi27_mmx(p);
break;
case SSE:
delete_viterbi27_sse(p);
break;
case SSE2:
delete_viterbi27_sse2(p);
break;
#endif
#ifdef __x86_64__
case SSE2:
delete_viterbi27_port(p);
break;
#endif
}
}
/* Update decoder with a block of demodulated symbols
* Note that nbits is the number of decoded data bits, not the number
* of symbols!
*/
int update_viterbi27_blk(void *p,unsigned char syms[],int nbits){
if(p == NULL)
return -1;
switch(Cpu_mode){
case PORT:
default:
update_viterbi27_blk_port(p,syms,nbits);
break;
#ifdef __VEC__
case ALTIVEC:
update_viterbi27_blk_av(p,syms,nbits);
break;
#endif
#ifdef __i386__
case MMX:
update_viterbi27_blk_mmx(p,syms,nbits);
break;
case SSE:
update_viterbi27_blk_sse(p,syms,nbits);
break;
case SSE2:
update_viterbi27_blk_sse2(p,syms,nbits);
break;
#endif
#ifdef __x86_64__
case SSE2:
update_viterbi27_blk_port(p,syms,nbits);
break;
#endif
}
return 0;
}

210
libfec/viterbi27_av.c Normal file
View File

@ -0,0 +1,210 @@
/* K=7 r=1/2 Viterbi decoder for PowerPC G4/G5 Altivec instructions
* Feb 2004, Phil Karn, KA9Q
*/
#include <stdio.h>
#include <memory.h>
#include <stdlib.h>
#include "fec.h"
typedef union { long long p; unsigned char c[64]; vector bool char v[4]; } decision_t;
typedef union { long long p; unsigned char c[64]; vector unsigned char v[4]; } metric_t;
static union branchtab27 { unsigned char c[32]; vector unsigned char v[2];} Branchtab27[2];
static int Init = 0;
/* State info for instance of Viterbi decoder
* Don't change this without also changing references in [mmx|sse|sse2]bfly29.s!
*/
struct v27 {
metric_t metrics1; /* path metric buffer 1 */
metric_t metrics2; /* path metric buffer 2 */
decision_t *dp; /* Pointer to current decision */
metric_t *old_metrics,*new_metrics; /* Pointers to path metrics, swapped on every bit */
decision_t *decisions; /* Beginning of decisions for block */
};
/* Initialize Viterbi decoder for start of new frame */
int init_viterbi27_av(void *p,int starting_state){
struct v27 *vp = p;
int i;
if(p == NULL)
return -1;
for(i=0;i<4;i++)
vp->metrics1.v[i] = (vector unsigned char)(63);
vp->old_metrics = &vp->metrics1;
vp->new_metrics = &vp->metrics2;
vp->dp = vp->decisions;
vp->old_metrics->c[starting_state & 63] = 0; /* Bias known start state */
return 0;
}
void set_viterbi27_polynomial_av(int polys[2]){
int state;
for(state=0;state < 32;state++){
Branchtab27[0].c[state] = (polys[0] < 0) ^ parity((2*state) & abs(polys[0])) ? 255 : 0;
Branchtab27[1].c[state] = (polys[1] < 0) ^ parity((2*state) & abs(polys[1])) ? 255 : 0;
}
Init++;
}
/* Create a new instance of a Viterbi decoder */
void *create_viterbi27_av(int len){
struct v27 *vp;
if(!Init){
int polys[2] = { V27POLYA,V27POLYB };
set_viterbi27_polynomial_av(polys);
}
if((vp = (struct v27 *)malloc(sizeof(struct v27))) == NULL)
return NULL;
if((vp->decisions = (decision_t *)malloc((len+6)*sizeof(decision_t))) == NULL){
free(vp);
return NULL;
}
init_viterbi27_av(vp,0);
return vp;
}
/* Viterbi chainback */
int chainback_viterbi27_av(
void *p,
unsigned char *data, /* Decoded output data */
unsigned int nbits, /* Number of data bits */
unsigned int endstate){ /* Terminal encoder state */
struct v27 *vp = p;
decision_t *d = (decision_t *)vp->decisions;
if(p == NULL)
return -1;
/* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate %= 64;
endstate <<= 2;
/* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += 6; /* Look past tail */
while(nbits-- != 0){
int k;
k = d[nbits].c[endstate>>2] & 1;
data[nbits>>3] = endstate = (endstate >> 1) | (k << 7);
}
return 0;
}
/* Delete instance of a Viterbi decoder */
void delete_viterbi27_av(void *p){
struct v27 *vp = p;
if(vp != NULL){
free(vp->decisions);
free(vp);
}
}
/* Process received symbols */
int update_viterbi27_blk_av(void *p,unsigned char *syms,int nbits){
struct v27 *vp = p;
decision_t *d;
if(p == NULL)
return -1;
d = (decision_t *)vp->dp;
while(nbits--){
vector unsigned char survivor0,survivor1,sym0v,sym1v;
vector bool char decision0,decision1;
vector unsigned char metric,m_metric,m0,m1,m2,m3;
void *tmp;
/* sym0v.0 = syms[0]; sym0v.1 = syms[1] */
sym0v = vec_perm(vec_ld(0,syms),vec_ld(1,syms),vec_lvsl(0,syms));
sym1v = vec_splat(sym0v,1); /* Splat syms[1] across sym1v */
sym0v = vec_splat(sym0v,0); /* Splat syms[0] across sym0v */
syms += 2;
/* Do the 32 butterflies as two interleaved groups of 16 each to keep the pipes full */
/* Form first set of 16 branch metrics */
metric = vec_avg(vec_xor(Branchtab27[0].v[0],sym0v),vec_xor(Branchtab27[1].v[0],sym1v));
metric = vec_sr(metric,(vector unsigned char)(3));
m_metric = vec_sub((vector unsigned char)(31),metric);
/* Form first set of path metrics */
m0 = vec_adds(vp->old_metrics->v[0],metric);
m3 = vec_adds(vp->old_metrics->v[2],metric);
m1 = vec_adds(vp->old_metrics->v[2],m_metric);
m2 = vec_adds(vp->old_metrics->v[0],m_metric);
/* Form second set of 16 branch metrics */
metric = vec_avg(vec_xor(Branchtab27[0].v[1],sym0v),vec_xor(Branchtab27[1].v[1],sym1v));
metric = vec_sr(metric,(vector unsigned char)(3));
m_metric = vec_sub((vector unsigned char)(31),metric);
/* Compare and select first set */
decision0 = vec_cmpgt(m0,m1);
decision1 = vec_cmpgt(m2,m3);
survivor0 = vec_min(m0,m1);
survivor1 = vec_min(m2,m3);
/* Compute second set of path metrics */
m0 = vec_adds(vp->old_metrics->v[1],metric);
m3 = vec_adds(vp->old_metrics->v[3],metric);
m1 = vec_adds(vp->old_metrics->v[3],m_metric);
m2 = vec_adds(vp->old_metrics->v[1],m_metric);
/* Interleave and store first decisions and survivors */
d->v[0] = vec_mergeh(decision0,decision1);
d->v[1] = vec_mergel(decision0,decision1);
vp->new_metrics->v[0] = vec_mergeh(survivor0,survivor1);
vp->new_metrics->v[1] = vec_mergel(survivor0,survivor1);
/* Compare and select second set */
decision0 = vec_cmpgt(m0,m1);
decision1 = vec_cmpgt(m2,m3);
survivor0 = vec_min(m0,m1);
survivor1 = vec_min(m2,m3);
/* Interleave and store second set of decisions and survivors */
d->v[2] = vec_mergeh(decision0,decision1);
d->v[3] = vec_mergel(decision0,decision1);
vp->new_metrics->v[2] = vec_mergeh(survivor0,survivor1);
vp->new_metrics->v[3] = vec_mergel(survivor0,survivor1);
/* renormalize if necessary */
if(vp->new_metrics->c[0] >= 105){
vector unsigned char scale0,scale1;
/* Find smallest metric and splat */
scale0 = vec_min(vp->new_metrics->v[0],vp->new_metrics->v[1]);
scale1 = vec_min(vp->new_metrics->v[2],vp->new_metrics->v[3]);
scale0 = vec_min(scale0,scale1);
scale0 = vec_min(scale0,vec_sld(scale0,scale0,8));
scale0 = vec_min(scale0,vec_sld(scale0,scale0,4));
scale0 = vec_min(scale0,vec_sld(scale0,scale0,2));
scale0 = vec_min(scale0,vec_sld(scale0,scale0,1));
/* Now subtract from all metrics */
vp->new_metrics->v[0] = vec_subs(vp->new_metrics->v[0],scale0);
vp->new_metrics->v[1] = vec_subs(vp->new_metrics->v[1],scale0);
vp->new_metrics->v[2] = vec_subs(vp->new_metrics->v[2],scale0);
vp->new_metrics->v[3] = vec_subs(vp->new_metrics->v[3],scale0);
}
d++;
/* Swap pointers to old and new metrics */
tmp = vp->old_metrics;
vp->old_metrics = vp->new_metrics;
vp->new_metrics = tmp;
}
vp->dp = d;
return 0;
}

Some files were not shown because too many files have changed in this diff Show More