diff --git a/CFA/demosaic.v b/Demosaic/demosaic.v similarity index 100% rename from CFA/demosaic.v rename to Demosaic/demosaic.v diff --git a/CFA/demosaic2.v b/Demosaic/demosaic2.v similarity index 95% rename from CFA/demosaic2.v rename to Demosaic/demosaic2.v index d4cb855..ad02b91 100644 --- a/CFA/demosaic2.v +++ b/Demosaic/demosaic2.v @@ -2,6 +2,7 @@ module demosaic2 #( parameter IM_WIDTH = 512, // 图像宽度 parameter IM_HEIGHT = 256, // 图像高度 parameter RAW_TYPE = 3 // 0:grbg 1:rggb 2:bggr 3:gbrg + parameter DATA_SIZE = 16, )( // 基本信号 input clk, @@ -9,15 +10,15 @@ module demosaic2 #( // 数据输入信号 input data_en, - input [15:0] data_in [2:0], // 数据输入线,0、1、2分别表示第一、二、三行 + input [DATA_SIZE - 1:0] data_in [2:0], // 数据输入线,0、1、2分别表示第一、二、三行 output reg data_que, // 数据请求线,高电平:请求三个数据,直到读取完才拉低 output reg data_line, // 新一行请求数据线,高电平:请求九个数据,直到读取完才拉低 // en: 输出数据有效信号,高电平有效 output reg out_en, - output reg [15:0] out_r, - output reg [15:0] out_g, - output reg [15:0] out_b + output reg [DATA_SIZE - 1:0] out_r, + output reg [DATA_SIZE - 1:0] out_g, + output reg [DATA_SIZE - 1:0] out_b ); // 常量,包括状态机 diff --git a/CFA/sim/tb_demosaic.v b/Demosaic/sim/tb_demosaic.v similarity index 100% rename from CFA/sim/tb_demosaic.v rename to Demosaic/sim/tb_demosaic.v diff --git a/CFA/sim/test.dat b/Demosaic/sim/test.dat similarity index 100% rename from CFA/sim/test.dat rename to Demosaic/sim/test.dat diff --git a/CFA/sim/test.raw b/Demosaic/sim/test.raw similarity index 100% rename from CFA/sim/test.raw rename to Demosaic/sim/test.raw diff --git a/CFA/sim/transform/im.tif b/Demosaic/sim/transform/im.tif similarity index 100% rename from CFA/sim/transform/im.tif rename to Demosaic/sim/transform/im.tif diff --git a/CFA/sim/transform/raw_cut.py b/Demosaic/sim/transform/raw_cut.py similarity index 100% rename from CFA/sim/transform/raw_cut.py rename to Demosaic/sim/transform/raw_cut.py diff --git a/CFA/sim/transform/raw_to_image.py b/Demosaic/sim/transform/raw_to_image.py similarity index 100% rename from CFA/sim/transform/raw_to_image.py rename to Demosaic/sim/transform/raw_to_image.py diff --git a/CFA/sim/transform/test.dat b/Demosaic/sim/transform/test.dat similarity index 100% rename from CFA/sim/transform/test.dat rename to Demosaic/sim/transform/test.dat diff --git a/CFA/sim/transform/test.tif b/Demosaic/sim/transform/test.tif similarity index 100% rename from CFA/sim/transform/test.tif rename to Demosaic/sim/transform/test.tif diff --git a/IM_PROCESS/chanels_to_RGB.v b/IM_PROCESS/chanels_to_RGB.v deleted file mode 100644 index 63f87cf..0000000 --- a/IM_PROCESS/chanels_to_RGB.v +++ /dev/null @@ -1,36 +0,0 @@ -`timescale 1ns/1ps - -// 三通道图像合成一个RGB图像 -module chanels_to_RGB #( - parameter IN_DEPTH = 12, // 输入图像的色深 - parameter OUT_DEPTH = 8 // 输出图像的色深 -) ( - input clk, - input reset, - - input in_en, - input [IN_DEPTH - 1:0] data_in [2:0], // 0:R 1:G 2:B - - output reg out_en, - output reg [3 * OUT_DEPTH - 1:0] data_out -); - reg [31:0] data_cal [2:0]; // 用于保存运算结果,防止溢出 - - always @(posedge clk or posedge reset) begin - if (reset) begin - // 初始化 - out_en <= 0; - data_out <= 0; - end - else begin - if (in_en) begin - data_cal[0] <= data_in[0] * OUT_DEPTH / IN_DEPTH; - data_cal[1] <= data_in[1] * OUT_DEPTH / IN_DEPTH; - data_cal[2] <= data_in[2] * OUT_DEPTH / IN_DEPTH; - - data_out <= {data_cal[0][OUT_DEPTH - 1:0], data_cal[1][OUT_DEPTH - 1:0],data_cal[2][OUT_DEPTH - 1:0]}; - end - out_en <= in_en; - end - end -endmodule \ No newline at end of file diff --git a/IM_PROCESS/scale_down_nearest.v b/IM_PROCESS/scale_down_nearest.v deleted file mode 100644 index 3d137a9..0000000 --- a/IM_PROCESS/scale_down_nearest.v +++ /dev/null @@ -1,20 +0,0 @@ -module scale_down_nearest #( - parameter IN_WIDTH = 1920, - parameter IN_HEIGHT = 1080, - parameter OUT_WIDTH = 640, - parameter OUT_HEIGHT = 480, - parameter COLOR_DEPTH = 8 -) ( - input clk, - input reset, - - input in_en, - input [3 * COLOR_DEPTH - 1:0] data_in, - - output out_en, - output [3 * COLOR_DEPTH - 1:0] data_out -); - - localparam - -endmodule \ No newline at end of file diff --git a/Merge/chanels_to_RGB.v b/Merge/chanels_to_RGB.v new file mode 100644 index 0000000..08f71ca --- /dev/null +++ b/Merge/chanels_to_RGB.v @@ -0,0 +1,67 @@ +`timescale 1ns/1ps + +// 三通道图像合成一个RGB图像 +module chanels_to_RGB #( + parameter IN_DEPTH = 12, // 输入图像的色深 + parameter OUT_DEPTH = 8 // 输出图像的色深 +) ( + input clk, + input reset, + + input in_en, + input [IN_DEPTH - 1:0] data_in [2:0], // 0:R 1:G 2:B + + // 输出相关 + input data_que; // 数据请求 + output reg out_en, + output reg [3 * OUT_DEPTH - 1:0] data_out +); + reg [31:0] data_cal [2:0]; // 用于保存运算结果,防止溢出 + reg fifo_en; + wire [3 * OUT_DEPTH - 1:0] fifo_in; // 输入fifo中缓存 + wire fifo_empty; + // wire fifo_alempty; + + always @(posedge clk or posedge reset) begin + if (reset) begin + // 初始化 + out_en <= 0; + data_out <= 0; + end + else begin + if (in_en) begin + data_cal[0] <= data_in[0] * OUT_DEPTH / IN_DEPTH; + data_cal[1] <= data_in[1] * OUT_DEPTH / IN_DEPTH; + data_cal[2] <= data_in[2] * OUT_DEPTH / IN_DEPTH; + + fifo_in <= {data_cal[0][OUT_DEPTH - 1:0], data_cal[1][OUT_DEPTH - 1:0],data_cal[2][OUT_DEPTH - 1:0]}; + // data_out <= {data_cal[0][OUT_DEPTH - 1:0], data_cal[1][OUT_DEPTH - 1:0],data_cal[2][OUT_DEPTH - 1:0]}; + end + fifo_en <= in_en; + end + end + + // 存在数据请求且FIFO不为空时,才发送数据 + assign out_en <= (data_que && !fifo_empty) ? 1 : 0; + + async_fifo #( + .DSIZE(3 * OUT_DEPTH), + .ASIZE(128), + ) RGB_FIFO ( + .wclk(clk), + .rclk(clk), + .wrst_n(reset), + .rrst_n(reset), + + .winc(fifo_en), + .wdata(fifo_in), + // .wfull(), + // .awfull(), + + // .arempty(fifo_alempty) + .rempty(fifo_empty), + .rdata(data_out), + .rinc(out_en) + ); + +endmodule \ No newline at end of file diff --git a/RAM/RGB_to_RAM.v b/RAM/RGB_to_RAM.v index 1894892..a539397 100644 --- a/RAM/RGB_to_RAM.v +++ b/RAM/RGB_to_RAM.v @@ -1,5 +1,91 @@ -module RGB_to_RAM ( - ports +`timescale 1ns/1ps + +module RGB_to_RAM #( + parameter COLOR_DEPTH = 8, + parameter FIFO_SIZE = 128 +) ( + input clk, + input reset, + + // 数据输入 + output in_que, + input in_en, + input [3 * COLOR_DEPTH - 1:0] data_in, + + // 写入SRAM + input write_que, + output write_en, + output [15:0] data_write ); + // 状态机 + localparam READ_DATA = 0; + localparam SEND_R = 1; + localparam SEND_GB = 2; + + reg [2:0] state, nextState; + reg [3 * COLOR_DEPTH - 1:0] data_cache; + reg [15:0] fifo_data; + wire fifo_full, fifo_empty; + + async_fifo #( + .DSIZE(16), + .ASIZE(FIFO_SIZE) + ) fifo_image ( + .wclk(clk), + .wrst_n(reset), + .rclk(clk), + .rrst_n(reset), + + .winc(in_en), + .wdata(fifo_data), + .wfull(fifo_full), + + .rinc(write_en), + .rdata(data_write), + .rempty(fifo_empty) + ); + + // 当有数据请求且FIFO不为空时,输出数据 + assign write_en = (write_que && !fifo_empty) ? 1 : 0; + + always @(posedge clk or posedge reset) begin + if (reset) + state <= READ_DATA; + else + state <= nextState; + end + + always @(posedge clk or posedge reset) begin + if (reset) begin + write_en <= 0; + data_write <= 0; + cnt <= 0; + fifo_data <= 0; + data_cache <= 0; + end + else begin + case (state) + // 读取数据 + READ_DATA: begin + in_que <= 1; + if (in_en) begin + data_cache <= data_in; + nextState <= SEND_R; + end + end + + SEND_R: begin + in_que <= 0; + fifo_data <= {8'b0, data_cache[3 * COLOR_DEPTH - 1:16]} + nextState <= SEND_GB; + end + + SEND_GB: begin + fifo_data <= data_cache[15:0]; + nextState <= READ_DATA; + end + endcase + end + end endmodule \ No newline at end of file diff --git a/Scaler/scaler.v b/Scaler/scaler.v new file mode 100644 index 0000000..09ad584 --- /dev/null +++ b/Scaler/scaler.v @@ -0,0 +1,760 @@ +/*----------------------------------------------------------------------------- + + Video Stream Scaler + + Author: David Kronstein + + + +Copyright 2011, David Kronstein, and individual contributors as indicated +by the @authors tag. + +This is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as +published by the Free Software Foundation; either version 2.1 of +the License, or (at your option) any later version. + +This software is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this software; if not, write to the Free +Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +02110-1301 USA, or see the FSF site: http://www.fsf.org. + + +------------------------------------------------------------------------------- + +Scales streaming video up or down in resolution. Bilinear and nearest neighbor +modes are supported. + +Run-time adjustment of input and output resolution, scaling factors, and scale +type. + +------------------------------------------------------------------------------- + +Revisions + +V1.0.0 Feb 21 2011 Initial Release David Kronstein +Known bugs: +Very slight numerical errors (+0/-2 LSb) in output data due to coefficient arithmetic. +Impossible to notice without adjustment in video levels. Attempted to fix by setting +coeff11 to 1.0 - other coefficients, but this caused timing issues. + +*/ +`default_nettype none + +module streamScaler #( +//---------------------------Parameters---------------------------------------- +parameter DATA_WIDTH = 8, //Width of input/output data +parameter CHANNELS = 1, //Number of channels of DATA_WIDTH, for color images +parameter DISCARD_CNT_WIDTH = 8, //Width of inputDiscardCnt +parameter INPUT_X_RES_WIDTH = 11, //Widths of input/output resolution control signals +parameter INPUT_Y_RES_WIDTH = 11, +parameter OUTPUT_X_RES_WIDTH = 11, +parameter OUTPUT_Y_RES_WIDTH = 11, +parameter FRACTION_BITS = 8, //Number of bits for fractional component of coefficients. + +parameter SCALE_INT_BITS = 4, //Width of integer component of scaling factor. The maximum input data width to + //multipliers created will be SCALE_INT_BITS + SCALE_FRAC_BITS. Typically these + //values will sum to 18 to match multipliers available in FPGAs. +parameter SCALE_FRAC_BITS = 14, //Width of fractional component of scaling factor +parameter BUFFER_SIZE = 4, //Depth of RFIFO +//---------------------Non-user-definable parameters---------------------------- +parameter COEFF_WIDTH = FRACTION_BITS + 1, +parameter SCALE_BITS = SCALE_INT_BITS + SCALE_FRAC_BITS, +parameter BUFFER_SIZE_WIDTH = ((BUFFER_SIZE+1) <= 2) ? 1 : //wide enough to hold value BUFFER_SIZE + 1 + ((BUFFER_SIZE+1) <= 4) ? 2 : + ((BUFFER_SIZE+1) <= 8) ? 3 : + ((BUFFER_SIZE+1) <= 16) ? 4 : + ((BUFFER_SIZE+1) <= 32) ? 5 : + ((BUFFER_SIZE+1) <= 64) ? 6 : 7 +)( +//---------------------------Module IO----------------------------------------- +//Clock and reset +input wire clk, +input wire rst, + +//User interface +//Input +input wire [DATA_WIDTH*CHANNELS-1:0]dIn, +input wire dInValid, +output wire nextDin, +input wire start, + +//Output +output reg [DATA_WIDTH*CHANNELS-1:0] + dOut, +output reg dOutValid, //latency of 4 clock cycles after nextDout is asserted +input wire nextDout, + +//Control +input wire [DISCARD_CNT_WIDTH-1:0] inputDiscardCnt, //Number of input pixels to discard before processing data. Used for clipping +input wire [INPUT_X_RES_WIDTH-1:0] inputXRes, //Resolution of input data minus 1 +input wire [INPUT_Y_RES_WIDTH-1:0] inputYRes, +input wire [OUTPUT_X_RES_WIDTH-1:0] outputXRes, //Resolution of output data minus 1 +input wire [OUTPUT_Y_RES_WIDTH-1:0] outputYRes, +input wire [SCALE_BITS-1:0] xScale, //Scaling factors. Input resolution scaled up by 1/xScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS +input wire [SCALE_BITS-1:0] yScale, //Scaling factors. Input resolution scaled up by 1/yScale. Format Q SCALE_INT_BITS.SCALE_FRAC_BITS + +input wire [OUTPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:0] + leftOffset, //Integer/fraction of input pixel to offset output data horizontally right. Format Q OUTPUT_X_RES_WIDTH.SCALE_FRAC_BITS +input wire [SCALE_FRAC_BITS-1:0] topFracOffset, //Fraction of input pixel to offset data vertically down. Format Q0.SCALE_FRAC_BITS +input wire nearestNeighbor //Use nearest neighbor resize instead of bilinear +); +//-----------------------Internal signals and registers------------------------ +reg advanceRead1; +reg advanceRead2; + +wire [DATA_WIDTH*CHANNELS-1:0] readData00; +wire [DATA_WIDTH*CHANNELS-1:0] readData01; +wire [DATA_WIDTH*CHANNELS-1:0] readData10; +wire [DATA_WIDTH*CHANNELS-1:0] readData11; +reg [DATA_WIDTH*CHANNELS-1:0] readData00Reg; +reg [DATA_WIDTH*CHANNELS-1:0] readData01Reg; +reg [DATA_WIDTH*CHANNELS-1:0] readData10Reg; +reg [DATA_WIDTH*CHANNELS-1:0] readData11Reg; + +wire [INPUT_X_RES_WIDTH-1:0] readAddress; + +reg readyForRead; //Indicates two full lines have been put into the buffer +reg [OUTPUT_Y_RES_WIDTH-1:0] outputLine; //which output video line we're on +reg [OUTPUT_X_RES_WIDTH-1:0] outputColumn; //which output video column we're on +reg [INPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:0] + xScaleAmount; //Fractional and integer components of input pixel select (multiply result) +reg [INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:0] + yScaleAmount; //Fractional and integer components of input pixel select (multiply result) +reg [INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:0] + yScaleAmountNext; //Fractional and integer components of input pixel select (multiply result) +wire [BUFFER_SIZE_WIDTH-1:0] fillCount; //Numbers used rams in the ram fifo +reg lineSwitchOutputDisable; //On the end of an output line, disable the output for one cycle to let the RAM data become valid +reg dOutValidInt; + +reg [COEFF_WIDTH-1:0] xBlend; +wire [COEFF_WIDTH-1:0] yBlend = {1'b0, yScaleAmount[SCALE_FRAC_BITS-1:SCALE_FRAC_BITS-FRACTION_BITS]}; + +wire [INPUT_X_RES_WIDTH-1:0] xPixLow = xScaleAmount[INPUT_X_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; +wire [INPUT_Y_RES_WIDTH-1:0] yPixLow = yScaleAmount[INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; +wire [INPUT_Y_RES_WIDTH-1:0] yPixLowNext = yScaleAmountNext[INPUT_Y_RES_WIDTH-1+SCALE_FRAC_BITS:SCALE_FRAC_BITS]; + +wire allDataWritten; //Indicates that all data from input has been read in +reg readState; + +//States for read state machine +parameter RS_START = 0; +parameter RS_READ_LINE = 1; + +//Read state machine +//Controls the RFIFO(ram FIFO) readout and generates output data valid signals +always @ (posedge clk or posedge rst or posedge start) +begin + if(rst | start) + begin + outputLine <= 0; + outputColumn <= 0; + xScaleAmount <= 0; + yScaleAmount <= 0; + readState <= RS_START; + dOutValidInt <= 0; + lineSwitchOutputDisable <= 0; + advanceRead1 <= 0; + advanceRead2 <= 0; + yScaleAmountNext <= 0; + end + else + begin + case (readState) + + RS_START: + begin + xScaleAmount <= leftOffset; + yScaleAmount <= {{INPUT_Y_RES_WIDTH{1'b0}}, topFracOffset}; + if(readyForRead) + begin + readState <= RS_READ_LINE; + dOutValidInt <= 1; + end + end + + RS_READ_LINE: + begin + + //outputLine goes through all output lines, and the logic determines which input lines to read into the RRB and which ones to discard. + if(nextDout && dOutValidInt) + begin + if(outputColumn == outputXRes) + begin //On the last input pixel of the line + if(yPixLowNext == (yPixLow + 1)) //If the next input line is only one greater, advance the RRB by one only + begin + advanceRead1 <= 1; + if(fillCount < 3) //If the RRB doesn't have enough data, stop reading it out + dOutValidInt <= 0; + end + else if(yPixLowNext > (yPixLow + 1)) //If the next input line is two or more greater, advance the read by two + begin + advanceRead2 <= 1; + if(fillCount < 4) //If the RRB doesn't have enough data, stop reading it out + dOutValidInt <= 0; + end + + outputColumn <= 0; + xScaleAmount <= leftOffset; + outputLine <= outputLine + 1; + yScaleAmount <= yScaleAmountNext; + lineSwitchOutputDisable <= 1; + end + else + begin + //Advance the output pixel selection values except when waiting for the ram data to become valid + if(lineSwitchOutputDisable == 0) + begin + outputColumn <= outputColumn + 1; + xScaleAmount <= (outputColumn + 1) * xScale + leftOffset; + end + advanceRead1 <= 0; + advanceRead2 <= 0; + lineSwitchOutputDisable <= 0; + end + end + else //else from if(nextDout && dOutValidInt) + begin + advanceRead1 <= 0; + advanceRead2 <= 0; + lineSwitchOutputDisable <= 0; + end + + //Once the RRB has enough data, let data be read from it. If all input data has been written, always allow read + if(fillCount >= 2 && dOutValidInt == 0 || allDataWritten) + begin + if((!advanceRead1 && !advanceRead2)) + begin + dOutValidInt <= 1; + lineSwitchOutputDisable <= 0; + end + end + end//state RS_READ_LINE: + endcase + + //yScaleAmountNext is used to determine which input lines are valid. + yScaleAmountNext <= (outputLine + 1) * yScale + {{OUTPUT_Y_RES_WIDTH{1'b0}}, topFracOffset}; + end +end + +assign readAddress = xPixLow; + +//Generate dOutValid signal, delayed to account for delays in data path +reg dOutValid_1; +reg dOutValid_2; +reg dOutValid_3; + +always @(posedge clk or posedge rst) +begin + if(rst) + begin + dOutValid_1 <= 0; + dOutValid_2 <= 0; + dOutValid_3 <= 0; + dOutValid <= 0; + end + else + begin + dOutValid_1 <= nextDout && dOutValidInt && !lineSwitchOutputDisable; + dOutValid_2 <= dOutValid_1; + dOutValid_3 <= dOutValid_2; + dOutValid <= dOutValid_3; + end +end + +//-----------------------Output data generation----------------------------- +//Scale amount values are used to generate coefficients for the four pixels coming out of the RRB to be multiplied with. + +//Coefficients for each of the four pixels +//Format Q1.FRACTION_BITS +// yx +reg [COEFF_WIDTH-1:0] coeff00; //Top left +reg [COEFF_WIDTH-1:0] coeff01; //Top right +reg [COEFF_WIDTH-1:0] coeff10; //Bottom left +reg [COEFF_WIDTH-1:0] coeff11; //Bottom right + +//Coefficient value of one, format Q1.COEFF_WIDTH-1 +wire [COEFF_WIDTH-1:0] coeffOne = {1'b1, {(COEFF_WIDTH-1){1'b0}}}; //One in MSb, zeros elsewhere +//Coefficient value of one half, format Q1.COEFF_WIDTH-1 +wire [COEFF_WIDTH-1:0] coeffHalf = {2'b01, {(COEFF_WIDTH-2){1'b0}}}; + +//Compute bilinear interpolation coefficinets. Done here because these pre-registerd values are used twice. +//Adding coeffHalf to get the nearest value. +wire [COEFF_WIDTH-1:0] preCoeff00 = (((coeffOne - xBlend) * (coeffOne - yBlend) + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; +wire [COEFF_WIDTH-1:0] preCoeff01 = ((xBlend * (coeffOne - yBlend) + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; +wire [COEFF_WIDTH-1:0] preCoeff10 = (((coeffOne - xBlend) * yBlend + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; + +//Compute the coefficients +always @(posedge clk or posedge rst) +begin + if(rst) + begin + coeff00 <= 0; + coeff01 <= 0; + coeff10 <= 0; + coeff11 <= 0; + xBlend <= 0; + end + else + begin + xBlend <= {1'b0, xScaleAmount[SCALE_FRAC_BITS-1:SCALE_FRAC_BITS-FRACTION_BITS]}; //Changed to registered to improve timing + + if(nearestNeighbor == 1'b0) + begin + //Normal bilinear interpolation + coeff00 <= preCoeff00; + coeff01 <= preCoeff01; + coeff10 <= preCoeff10; + coeff11 <= ((xBlend * yBlend + (coeffHalf - 1)) >> FRACTION_BITS) & {{COEFF_WIDTH{1'b0}}, {COEFF_WIDTH{1'b1}}}; + //coeff11 <= coeffOne - preCoeff00 - preCoeff01 - preCoeff10; //Guarantee that all coefficients sum to coeffOne. Saves a multiply too. Reverted to previous method due to timing issues. + end + else + begin + //Nearest neighbor interploation, set one coefficient to 1.0, the rest to zero based on the fractions + coeff00 <= xBlend < coeffHalf && yBlend < coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; + coeff01 <= xBlend >= coeffHalf && yBlend < coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; + coeff10 <= xBlend < coeffHalf && yBlend >= coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; + coeff11 <= xBlend >= coeffHalf && yBlend >= coeffHalf ? coeffOne : {COEFF_WIDTH{1'b0}}; + end + end +end + + +//Generate the blending multipliers +reg [(DATA_WIDTH+COEFF_WIDTH)*CHANNELS-1:0] product00, product01, product10, product11; + +generate +genvar channel; + for(channel = 0; channel < CHANNELS; channel = channel + 1) + begin : blend_mult_generate + always @(posedge clk or posedge rst) + begin + if(rst) + begin + //productxx[channel] <= 0; + product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; + product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; + product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; + product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= 0; + + //readDataxxReg[channel] <= 0; + readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; + readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; + readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; + readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; + + //dOut[channel] <= 0; + dOut[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= 0; + end + else + begin + //readDataxxReg[channel] <= readDataxx[channel]; + readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData00[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; + readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData01[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; + readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData10[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; + readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= readData11[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ]; + + //productxx[channel] <= readDataxxReg[channel] * coeffxx + product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData00Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff00; + product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData01Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff01; + product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData10Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff10; + product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel] <= readData11Reg[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] * coeff11; + + //dOut[channel] <= (((product00[channel]) + + // (product01[channel]) + + // (product10[channel]) + + // (product11[channel])) >> FRACTION_BITS) & ({ {COEFF_WIDTH{1'b0}}, {DATA_WIDTH{1'b1}} }); + dOut[ DATA_WIDTH*(channel+1)-1 : DATA_WIDTH*channel ] <= + (((product00[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + + (product01[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + + (product10[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel]) + + (product11[ (DATA_WIDTH+COEFF_WIDTH)*(channel+1)-1 : (DATA_WIDTH+COEFF_WIDTH)*channel])) >> FRACTION_BITS) & ({ {COEFF_WIDTH{1'b0}}, {DATA_WIDTH{1'b1}} }); + end + end + end +endgenerate + + +//---------------------------Data write logic---------------------------------- +//Places input data into the correct ram in the RFIFO (ram FIFO) +//Controls writing to the RFIFO, and discards lines that arn't used + +reg [INPUT_Y_RES_WIDTH-1:0] writeNextValidLine; //Which line greater than writeRowCount is the next one that must be read in +reg [INPUT_Y_RES_WIDTH-1:0] writeNextPlusOne; //One greater than writeNextValidLine, because we must always read in two adjacent lines +reg [INPUT_Y_RES_WIDTH-1:0] writeRowCount; //Which line we're reading from dIn +reg [OUTPUT_Y_RES_WIDTH-1:0] writeOutputLine; //The output line that corresponds to the input line. This is incremented until writeNextValidLine is greater than writeRowCount +reg getNextPlusOne; //Flag so that writeNextPlusOne is captured only once after writeRowCount >= writeNextValidLine. This is in case multiple cycles are requred until writeNextValidLine changes. + +//Determine which lines to read out and which to discard. +//writeNextValidLine is the next valid line number that needs to be read out above current value writeRowCount +//writeNextPlusOne also needs to be read out (to do interpolation), this may or may not be equal to writeNextValidLine +always @(posedge clk or posedge rst or posedge start) +begin + if(rst | start) + begin + writeOutputLine <= 0; + writeNextValidLine <= 0; + writeNextPlusOne <= 1; + getNextPlusOne <= 1; + end + else + begin + if(writeRowCount >= writeNextValidLine) //When writeRowCount becomes higher than the next valid line to read out, comptue the next valid line. + begin + if(getNextPlusOne) //Keep writeNextPlusOne + begin + writeNextPlusOne <= writeNextValidLine + 1; + end + getNextPlusOne <= 0; + writeOutputLine <= writeOutputLine + 1; + writeNextValidLine <= ((writeOutputLine*yScale + {{(OUTPUT_Y_RES_WIDTH + SCALE_INT_BITS){1'b0}}, topFracOffset}) >> SCALE_FRAC_BITS) & {{SCALE_BITS{1'b0}}, {OUTPUT_Y_RES_WIDTH{1'b1}}}; + end + else + begin + getNextPlusOne <= 1; + end + end +end + +reg discardInput; +reg [DISCARD_CNT_WIDTH-1:0] discardCountReg; +wire advanceWrite; + +reg [1:0] writeState; + +reg [INPUT_X_RES_WIDTH-1:0] writeColCount; +reg enableNextDin; +reg forceRead; + +//Write state machine +//Controls writing scaler input data into the RRB + +parameter WS_START = 0; +parameter WS_DISCARD = 1; +parameter WS_READ = 2; +parameter WS_DONE = 3; + +//Control write and address signals to write data into ram FIFO +always @ (posedge clk or posedge rst or posedge start) +begin + if(rst | start) + begin + writeState <= WS_START; + enableNextDin <= 0; + discardInput <= 0; + readyForRead <= 0; + writeRowCount <= 0; + writeColCount <= 0; + discardCountReg <= 0; + forceRead <= 0; + end + else + begin + case (writeState) + + WS_START: + begin + discardCountReg <= inputDiscardCnt; + if(inputDiscardCnt > 0) + begin + discardInput <= 1; + enableNextDin <= 1; + writeState <= WS_DISCARD; + end + else + begin + discardInput <= 0; + enableNextDin <= 1; + writeState <= WS_READ; + end + discardInput <= (inputDiscardCnt > 0) ? 1'b1 : 1'b0; + end + + WS_DISCARD: //Discard pixels from input data + begin + if(dInValid) + begin + discardCountReg <= discardCountReg - 1; + if((discardCountReg - 1) == 0) + begin + discardInput <= 0; + writeState <= WS_READ; + end + end + end + + WS_READ: + begin + if(dInValid & nextDin) + begin + if(writeColCount == inputXRes) + begin //Occurs on the last pixel in the line + if((writeNextValidLine == writeRowCount + 1) || + (writeNextPlusOne == writeRowCount + 1)) + begin //Next line is valid, write into buffer + discardInput <= 0; + end + else + begin //Next line is not valid, discard + discardInput <= 1; + end + + //Once writeRowCount is >= 2, data is ready to start being output. + if(writeRowCount[1]) + readyForRead <= 1; + + if(writeRowCount == inputYRes) //When all data has been read in, stop reading. + begin + writeState <= WS_DONE; + enableNextDin <= 0; + forceRead <= 1; + end + + writeColCount <= 0; + writeRowCount <= writeRowCount + 1; + end + else + begin + writeColCount <= writeColCount + 1; + end + end + end + + WS_DONE: + begin + //do nothing, wait for reset + end + + endcase + end +end + + +//Advance write whenever we have just written a valid line (discardInput == 0) +//Generate this signal one earlier than discardInput above that uses the same conditions, to advance the buffer at the right time. +assign advanceWrite = (writeColCount == inputXRes) & (discardInput == 0) & dInValid & nextDin; +assign allDataWritten = writeState == WS_DONE; +assign nextDin = (fillCount < BUFFER_SIZE) & enableNextDin; + +ramFifo #( + .DATA_WIDTH( DATA_WIDTH*CHANNELS ), + .ADDRESS_WIDTH( INPUT_X_RES_WIDTH ), //Controls width of RAMs + .BUFFER_SIZE( BUFFER_SIZE ) //Number of RAMs +) ramRB ( + .clk( clk ), + .rst( rst | start ), + .advanceRead1( advanceRead1 ), + .advanceRead2( advanceRead2 ), + .advanceWrite( advanceWrite ), + .forceRead( forceRead ), + + .writeData( dIn ), + .writeAddress( writeColCount ), + .writeEnable( dInValid & nextDin & enableNextDin & ~discardInput ), + .fillCount( fillCount ), + + .readData00( readData00 ), + .readData01( readData01 ), + .readData10( readData10 ), + .readData11( readData11 ), + .readAddress( readAddress ) +); + +endmodule //scaler + + + +//---------------------------Ram FIFO (RFIFO)----------------------------- +//FIFO buffer with rams as the elements, instead of data +//One ram is filled, while two others are simultaneously read out. +//Four neighboring pixels are read out at once, at the selected RAM and one line down, and at readAddress and readAddress + 1 +module ramFifo #( + parameter DATA_WIDTH = 8, + parameter ADDRESS_WIDTH = 8, + parameter BUFFER_SIZE = 2, + parameter BUFFER_SIZE_WIDTH = ((BUFFER_SIZE+1) <= 2) ? 1 : //wide enough to hold value BUFFER_SIZE + 1 + ((BUFFER_SIZE+1) <= 4) ? 2 : + ((BUFFER_SIZE+1) <= 8) ? 3 : + ((BUFFER_SIZE+1) <= 16) ? 4 : + ((BUFFER_SIZE+1) <= 32) ? 5 : + ((BUFFER_SIZE+1) <= 64) ? 6 : 7 +)( + input wire clk, + input wire rst, + input wire advanceRead1, //Advance selected read RAM by one + input wire advanceRead2, //Advance selected read RAM by two + input wire advanceWrite, //Advance selected write RAM by one + input wire forceRead, //Disables writing to allow all data to be read out (RAM being written to cannot be read from normally) + + input wire [DATA_WIDTH-1:0] writeData, + input wire [ADDRESS_WIDTH-1:0] writeAddress, + input wire writeEnable, + output reg [BUFFER_SIZE_WIDTH-1:0] + fillCount, + + // yx + output wire [DATA_WIDTH-1:0] readData00, //Read from deepest RAM (earliest data), at readAddress + output wire [DATA_WIDTH-1:0] readData01, //Read from deepest RAM (earliest data), at readAddress + 1 + output wire [DATA_WIDTH-1:0] readData10, //Read from second deepest RAM (second earliest data), at readAddress + output wire [DATA_WIDTH-1:0] readData11, //Read from second deepest RAM (second earliest data), at readAddress + 1 + input wire [ADDRESS_WIDTH-1:0] readAddress +); + +reg [BUFFER_SIZE-1:0] writeSelect; +reg [BUFFER_SIZE-1:0] readSelect; + +//Read select ring register +always @(posedge clk or posedge rst) +begin + if(rst) + readSelect <= 1; + else + begin + if(advanceRead1) + begin + readSelect <= {readSelect[BUFFER_SIZE-2 : 0], readSelect[BUFFER_SIZE-1]}; + end + else if(advanceRead2) + begin + readSelect <= {readSelect[BUFFER_SIZE-3 : 0], readSelect[BUFFER_SIZE-1:BUFFER_SIZE-2]}; + end + end +end + +//Write select ring register +always @(posedge clk or posedge rst) +begin + if(rst) + writeSelect <= 1; + else + begin + if(advanceWrite) + begin + writeSelect <= {writeSelect[BUFFER_SIZE-2 : 0], writeSelect[BUFFER_SIZE-1]}; + end + end +end + +wire [DATA_WIDTH-1:0] ramDataOutA [2**BUFFER_SIZE-1:0]; +wire [DATA_WIDTH-1:0] ramDataOutB [2**BUFFER_SIZE-1:0]; + +//Generate to instantiate the RAMs +generate +genvar i; + for(i = 0; i < BUFFER_SIZE; i = i + 1) + begin : ram_generate + + ramDualPort #( + .DATA_WIDTH( DATA_WIDTH ), + .ADDRESS_WIDTH( ADDRESS_WIDTH ) + ) ram_inst_i( + .clk( clk ), + + //Port A is written to as well as read from. When writing, this port cannot be read from. + //As long as the buffer is large enough, this will not cause any problem. + .addrA( ((writeSelect[i] == 1'b1) && !forceRead && writeEnable) ? writeAddress : readAddress ), //&& writeEnable is + //to allow the full buffer to be used. After the buffer is filled, write is advanced, so writeSelect + //and readSelect are the same. The full buffer isn't written to, so this allows the read to work properly. + .dataA( writeData ), + .weA( ((writeSelect[i] == 1'b1) && !forceRead) ? writeEnable : 1'b0 ), + .qA( ramDataOutA[2**i] ), + + .addrB( readAddress + 1 ), + .dataB( 0 ), + .weB( 1'b0 ), + .qB( ramDataOutB[2**i] ) + ); + end +endgenerate + +//Select which ram to read from +wire [BUFFER_SIZE-1:0] readSelect0 = readSelect; +wire [BUFFER_SIZE-1:0] readSelect1 = (readSelect << 1) | readSelect[BUFFER_SIZE-1]; + +//Steer the output data to the right ports +assign readData00 = ramDataOutA[readSelect0]; +assign readData10 = ramDataOutA[readSelect1]; +assign readData01 = ramDataOutB[readSelect0]; +assign readData11 = ramDataOutB[readSelect1]; + +//Keep track of fill level +always @(posedge clk or posedge rst) +begin + if(rst) + begin + fillCount <= 0; + end + else + begin + if(advanceWrite) + begin + if(advanceRead1) + fillCount <= fillCount; + else if(advanceRead2) + fillCount <= fillCount - 1; + else + fillCount <= fillCount + 1; + end + else + begin + if(advanceRead1) + fillCount <= fillCount - 1; + else if(advanceRead2) + fillCount <= fillCount - 2; + else + fillCount <= fillCount; + end + end +end + +endmodule //ramFifo + + +//Dual port RAM +module ramDualPort #( + parameter DATA_WIDTH = 8, + parameter ADDRESS_WIDTH = 8 +)( + input wire [(DATA_WIDTH-1):0] dataA, dataB, + input wire [(ADDRESS_WIDTH-1):0] addrA, addrB, + input wire weA, weB, clk, + output reg [(DATA_WIDTH-1):0] qA, qB +); + + // Declare the RAM variable + reg [DATA_WIDTH-1:0] ram[2**ADDRESS_WIDTH-1:0]; + + //Port A + always @ (posedge clk) + begin + if (weA) + begin + ram[addrA] <= dataA; + qA <= dataA; + end + else + begin + qA <= ram[addrA]; + end + end + + //Port B + always @ (posedge clk) + begin + if (weB) + begin + ram[addrB] <= dataB; + qB <= dataB; + end + else + begin + qB <= ram[addrB]; + end + end + +endmodule //ramDualPort diff --git a/Scaler/sim/out/output1280x1024to640x512.raw b/Scaler/sim/out/output1280x1024to640x512.raw new file mode 100644 index 0000000..89cfb06 Binary files /dev/null and b/Scaler/sim/out/output1280x1024to640x512.raw differ diff --git a/Scaler/sim/out/output640x512to1280x1024.raw b/Scaler/sim/out/output640x512to1280x1024.raw new file mode 100644 index 0000000..38a7f8d Binary files /dev/null and b/Scaler/sim/out/output640x512to1280x1024.raw differ diff --git a/Scaler/sim/out/output640x512to640x512.raw b/Scaler/sim/out/output640x512to640x512.raw new file mode 100644 index 0000000..f2e60ae Binary files /dev/null and b/Scaler/sim/out/output640x512to640x512.raw differ diff --git a/Scaler/sim/scaler_tb.v b/Scaler/sim/scaler_tb.v new file mode 100644 index 0000000..b6e98c3 --- /dev/null +++ b/Scaler/sim/scaler_tb.v @@ -0,0 +1,437 @@ +/*----------------------------------------------------------------------------- + + Video Stream Scaler testbench + + Author: David Kronstein + + + +Copyright 2011, David Kronstein, and individual contributors as indicated +by the @authors tag. + +This is free software; you can redistribute it and/or modify it +under the terms of the GNU Lesser General Public License as +published by the Free Software Foundation; either version 2.1 of +the License, or (at your option) any later version. + +This software is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this software; if not, write to the Free +Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +02110-1301 USA, or see the FSF site: http://www.fsf.org. + +------------------------------------------------------------------------------- + +Testbench for streamScaler V1.0.0 + +*/ + +`default_nettype none + +//Input files. Raw data format, no header. 8 bits per pixel, 3 color channels. +`define INPUT640x512 "src/input640x512RGB.raw" +`define INPUT1280x1024 "src/input1280x1024RGB.raw" +`define INPUT1280x1024_21EXTRA "src/input640x512_21extraRGB.raw" //21 extra pixels at the start to be discarded + +module scalerTestbench; +parameter BUFFER_SIZE = 4; + +wire [7-1:0] done; + +//640x512 to 1280x1024 + scalerTest #( + .INPUT_X_RES ( 640-1 ), + .INPUT_Y_RES ( 512-1 ), + .OUTPUT_X_RES ( 1280-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 1024-1 ), //Output resolution - 1 + //.X_SCALE ( X_SCALE ), + //.Y_SCALE ( Y_SCALE ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_640x512to1280x1024 ( + .inputFilename( `INPUT640x512 ), + .outputFilename( "out/output640x512to1280x1024.raw" ), + + //Control + .inputDiscardCnt( 0 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 0 ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[0] ) + ); + + +//640x512 to 640x512 + scalerTest #( + .INPUT_X_RES ( 640-1 ), + .INPUT_Y_RES ( 512-1 ), + .OUTPUT_X_RES ( 640-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 512-1 ), //Output resolution - 1 + .X_SCALE ( 32'h4000 ), + .Y_SCALE ( 32'h4000 ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_640x512to640x512 ( + .inputFilename( `INPUT640x512 ), + .outputFilename( "out/output640x512to640x512.raw" ), + + //Control + .inputDiscardCnt( 0 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 0 ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[1] ) + ); + + +//1280x1024 to 960x768 + scalerTest #( + .INPUT_X_RES ( 1280-1 ), + .INPUT_Y_RES ( 1024-1 ), + .OUTPUT_X_RES ( 960-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 768-1 ), //Output resolution - 1 + //.X_SCALE ( X_SCALE ), + //.Y_SCALE ( Y_SCALE ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_1280x1024to960x768 ( + .inputFilename( `INPUT1280x1024 ), + .outputFilename( "out/output1280x1024to960x768.raw" ), + + //Control + .inputDiscardCnt( 0 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 0 ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[2] ) + ); + + +//1280x1024 to 640x512 + scalerTest #( + .INPUT_X_RES ( 1280-1 ), + .INPUT_Y_RES ( 1024-1 ), + .OUTPUT_X_RES ( 640-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 512-1 ), //Output resolution - 1 + .X_SCALE ( 32'h4000*2 ), + .Y_SCALE ( 32'h4000*2 ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_1280x1024to640x512 ( + .inputFilename( `INPUT1280x1024 ), + .outputFilename( "out/output1280x1024to640x512.raw" ), + + //Control + .inputDiscardCnt( 0 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 25'h1FFF ), + .topFracOffset( 25'h1FFF ), + .nearestNeighbor( 0 ), + .done ( done[3] ) + ); + +//1280x1024 to 480x384 + + scalerTest #( + .INPUT_X_RES ( 1280-1 ), + .INPUT_Y_RES ( 1024-1 ), + .OUTPUT_X_RES ( 480-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 384-1 ), //Output resolution - 1 + //.X_SCALE ( 32'h4000*2 ), + //.Y_SCALE ( 32'h4000*2 ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_1280x1024to480x384 ( + .inputFilename( `INPUT1280x1024 ), + .outputFilename( "out/output1280x1024to480x384.raw" ), + + //Control + .inputDiscardCnt( 0 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 0 ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[4] ) + ); + +//640x512 to 1280x1024, discarding 21 + + scalerTest #( + .INPUT_X_RES ( 640-1 ), + .INPUT_Y_RES ( 512-1 ), + .OUTPUT_X_RES ( 1280-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 1024-1 ), //Output resolution - 1 + //.X_SCALE ( 32'h4000*2 ), + //.Y_SCALE ( 32'h4000*2 ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 8 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_640x512to1280x1024_21extra ( + .inputFilename( `INPUT1280x1024_21EXTRA ), + .outputFilename( "out/output640x512to1280x1024_21extra.raw" ), + + //Control + .inputDiscardCnt( 21 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( 0 ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[5] ) + ); + +//640x512 to 1280x1024, discarding 21 + + scalerTest #( + .INPUT_X_RES ( 640-1 ), + .INPUT_Y_RES ( 40-1 ), + .OUTPUT_X_RES ( 640-1 ), //Output resolution - 1 + .OUTPUT_Y_RES ( 512-1 ), //Output resolution - 1 + .X_SCALE ( 32'h4000 * (50-1) / (640-1)-1 ), + .Y_SCALE ( 32'h4000 * (40-1) / (512-1)-1 ), + + .DATA_WIDTH ( 8 ), + .DISCARD_CNT_WIDTH ( 14 ), + .INPUT_X_RES_WIDTH ( 11 ), + .INPUT_Y_RES_WIDTH ( 11 ), + .OUTPUT_X_RES_WIDTH ( 11 ), + .OUTPUT_Y_RES_WIDTH ( 11 ), + .BUFFER_SIZE ( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer + ) st_50x40to640x512clipped ( + .inputFilename( `INPUT640x512 ), + .outputFilename( "out/output50x40to640x512clipped.raw" ), + + //Control + .inputDiscardCnt( 640*3 ), //Number of input pixels to discard before processing data. Used for clipping + .leftOffset( {11'd249, 14'b0} ), + .topFracOffset( 0 ), + .nearestNeighbor( 0 ), + .done ( done[6] ) + ); + + initial + begin + #10 + while(done != 7'b1111111) + #10 + ; + $stop; + end + + + + +endmodule + +module scalerTest #( +parameter INPUT_X_RES = 120-1, +parameter INPUT_Y_RES = 90-1, +parameter OUTPUT_X_RES = 1280-1, //Output resolution - 1 +parameter OUTPUT_Y_RES = 960-1, //Output resolution - 1 +parameter X_SCALE = 32'h4000 * (INPUT_X_RES) / (OUTPUT_X_RES)-1, +parameter Y_SCALE = 32'h4000 * (INPUT_Y_RES) / (OUTPUT_Y_RES)-1, + +parameter DATA_WIDTH = 8, +parameter CHANNELS = 3, +parameter DISCARD_CNT_WIDTH = 8, +parameter INPUT_X_RES_WIDTH = 11, +parameter INPUT_Y_RES_WIDTH = 11, +parameter OUTPUT_X_RES_WIDTH = 11, +parameter OUTPUT_Y_RES_WIDTH = 11, +parameter BUFFER_SIZE = 6 //Number of RAMs in RAM ring buffer +)( +input wire [50*8:0] inputFilename, outputFilename, + +//Control +input wire [DISCARD_CNT_WIDTH-1:0] inputDiscardCnt, //Number of input pixels to discard before processing data. Used for clipping +input wire [INPUT_X_RES_WIDTH+14-1:0] leftOffset, +input wire [14-1:0] topFracOffset, +input wire nearestNeighbor, + +output reg done + +); + + +reg clk; +reg rst; + + +reg [DATA_WIDTH*CHANNELS-1:0] dIn; +reg dInValid; +wire nextDin; +reg start; + +wire [DATA_WIDTH*CHANNELS-1:0] dOut; +wire dOutValid; +reg nextDout; + +integer r, rfile, wfile; + +initial // Clock generator + begin + #10 //Delay to allow filename to get here + clk = 0; + #5 forever #5 clk = !clk; + end + +initial // Reset + begin + done = 0; + #10 //Delay to allow filename to get here + rst = 0; + #5 rst = 1; + #4 rst = 0; + // #50000 $stop; + end + +reg eof; +reg [DATA_WIDTH*CHANNELS-1:0] readMem [0:0]; +initial // Input file read, generates dIn data +begin + #10 //Delay to allow filename to get here + rfile = $fopen(inputFilename, "rb"); + + dIn = 0; + dInValid = 0; + start = 0; + + #41 + start = 1; + + #10 + start = 0; + + #20 + r = $fread(readMem, rfile); + dIn = readMem[0]; + + while(! $feof(rfile)) + begin + dInValid = 1; + + #10 + if(nextDin) + begin + r = $fread(readMem, rfile); + dIn = readMem[0]; + end + end + + $fclose(rfile); +end + +//Generate nextDout request signal +initial +begin + #10 //Delay to match filename arrival delay + nextDout = 0; + #140001 + forever + begin + //This can be used to slow down the read to simulate live read-out. This basically inserts H blank periods. + #(10*(OUTPUT_X_RES+1)*4) + nextDout = 0; + #(10*(OUTPUT_X_RES+1)) + nextDout = 1; + + end +end + +//Read dOut and write to file +integer dOutCount; +initial +begin + #10 //Delay to allow filename to get here + wfile = $fopen(outputFilename, "wb"); + nextDout = 0; + dOutCount = 0; + #1 + while(dOutCount < (OUTPUT_X_RES+1) * (OUTPUT_Y_RES+1)) + begin + #10 + if(dOutValid == 1) + begin + $fwrite(wfile, "%c", dOut[23:16]); + $fwrite(wfile, "%c", dOut[15:8]); + $fwrite(wfile, "%c", dOut[7:0]); + dOutCount = dOutCount + 1; + end + end + $fclose(wfile); + done = 1; +end + +streamScaler #( +.DATA_WIDTH( DATA_WIDTH ), +.CHANNELS( CHANNELS ), +.DISCARD_CNT_WIDTH( DISCARD_CNT_WIDTH ), +.INPUT_X_RES_WIDTH( INPUT_X_RES_WIDTH ), +.INPUT_Y_RES_WIDTH( INPUT_Y_RES_WIDTH ), +.OUTPUT_X_RES_WIDTH( OUTPUT_X_RES_WIDTH ), +.OUTPUT_Y_RES_WIDTH( OUTPUT_Y_RES_WIDTH ), +.BUFFER_SIZE( BUFFER_SIZE ) //Number of RAMs in RAM ring buffer +) scaler_inst ( +.clk( clk ), +.rst( rst ), + +.dIn( dIn ), +.dInValid( dInValid ), +.nextDin( nextDin ), +.start( start ), + +.dOut( dOut ), +.dOutValid( dOutValid ), +.nextDout( nextDout ), + +//Control +.inputDiscardCnt( inputDiscardCnt ), //Number of input pixels to discard before processing data. Used for clipping +.inputXRes( INPUT_X_RES ), //Input data number of pixels per line +.inputYRes( INPUT_Y_RES ), + +.outputXRes( OUTPUT_X_RES ), //Resolution of output data +.outputYRes( OUTPUT_Y_RES ), +.xScale( X_SCALE ), //Scaling factors. Input resolution scaled by 1/xScale. Format Q4.14 +.yScale( Y_SCALE ), //Scaling factors. Input resolution scaled by 1/yScale. Format Q4.14 + +.leftOffset( leftOffset ), +.topFracOffset( topFracOffset ), +.nearestNeighbor( nearestNeighbor ) +); + +endmodule diff --git a/isp.v b/isp.v index 0d697ef..0d1cd05 100644 --- a/isp.v +++ b/isp.v @@ -18,24 +18,50 @@ module isp #( output reg data_que, // 数据请求线,高电平:请求三个数据,直到读取完才拉低 output reg data_line, // 新一行请求数据线,高电平:请求九个数据,直到读取完才拉低 - // en: 输出数据有效信号,高电平有效 - output reg out_en, - output reg [15:0] out_r, - output reg [15:0] out_g, - output reg [15:0] out_b ); + // 三通道合成RGB图像 + wire rgb_en; + wire [15:0] im_red, im_green, im_blue; - demosaic2 #(IN_WIDTH, IN_HEIGHT, RAW_TYPE) CFA ( + // 任意比例缩放图像 + wire scale_in_en; + wire scale_in_que; // scaler 请求数据 + wire [3 * COLOR_DEPTH - 1:0] scale_in_data; + + demosaic2 #( + .IM_WIDTH(1936), + .IM_HEIGHT(1088), + ) CFA ( .clk(clk), .reset(reset), .data_en(data_en), .data_in(data_in), .data_que(data_que), .data_line(data_line), - .out_en(out_en), - .out_r(out_r), - .out_g(out_g), - .out_b(out_b) + .out_en(rgb_en), + .out_r(im_red), + .out_g(im_green), + .out_b(im_blue) + ); + + chanels_to_RGB merge_toRGB( + .clk(clk), + .reset(reset), + .in_en(rgb_en), + .data_in[0](im_red[11:0]), + .data_in[1](im_green[11:0]), + .data_in[2](im_red[11:0]), + + .data_que(scale_que), + .out_en(scale_en), + .data_out(scale_in_data) + ); + + streamScaler #( + .DATA_WIDTH(COLOR_DEPTH), + .CHANNELS(3), + ) scaler ( + ); endmodule