1/*
2	Copyright (c) 2002-2004, Thomas Kurschel
3
4
5	Part of Radeon accelerant
6
7	Takes care of PLL
8*/
9
10
11#include "radeon_accelerant.h"
12
13#include "pll_regs.h"
14#include "pll_access.h"
15#include "utils.h"
16#include <stdlib.h>
17#include "set_mode.h"
18
19
20static void Radeon_PLLWaitForReadUpdateComplete(
21	accelerator_info *ai, int crtc_idx )
22{
23	int i;
24
25	// we should wait forever, but
26	// 1. this is unsafe
27	// 2. some r300 loop forever (reported by XFree86)
28	for( i = 0; i < 10000; ++i ) {
29		if( (Radeon_INPLL( ai->regs, ai->si->asic, crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV )
30			& RADEON_PPLL_ATOMIC_UPDATE_R) == 0 )
31			return;
32	}
33}
34
35static void Radeon_PLLWriteUpdate(
36	accelerator_info *ai, int crtc_idx )
37{
38	Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );
39
40    Radeon_OUTPLLP( ai->regs, ai->si->asic,
41    	crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
42    	RADEON_PPLL_ATOMIC_UPDATE_W,
43    	~RADEON_PPLL_ATOMIC_UPDATE_W );
44}
45
46// calculate PLL dividers
47// pll - info about PLL
48// freq - whished frequency in Hz
49// fixed_post_div - if != 0, fixed divider to be used
50// dividers - filled with proper dividers
51void Radeon_CalcPLLDividers(
52	const pll_info *pll, uint32 freq, uint fixed_post_div, pll_dividers *dividers )
53{
54	// the PLL gets the reference
55	//		pll_in = ref_freq / ref_div
56	// this must be within pll_in_min..pll_in_max
57	// the VCO of the PLL has the frequency
58	//		vco = pll_in * feedback_div * extra_feedback_div
59	//		    = ref_freq / ref_div * feedback_div * extra_feedback_div
60	// where pre_feedback_div is hard-wired
61	// this must be within vco_min..vco_max
62	// the pixel clock is calculated as
63	//		pll_out = vco / post_div / extra_post_div
64	//		        = ref_freq * feedback_div * extra_feedback_div / (ref_div * post_div * extra_post_div)
65	// where extra_post_div _may_ be choosable between 1 and 2
66
67	// synonyms are:
68	//		ref_div = M
69	//		feedback_div = N
70	//		post_div = P
71
72	int
73		min_post_div_idx, max_post_div_idx,
74		post_div_idx, extra_post_div_idx,
75		best_post_div_idx, best_extra_post_div_idx;
76
77	uint32
78		best_ref_div, best_feedback_div, best_freq;
79	int32
80		best_error, best_vco_dev;
81
82	best_error = 999999999;
83
84	// make compiler happy
85	best_post_div_idx = 0;
86	best_extra_post_div_idx = 0;
87	best_ref_div = 1;
88	best_feedback_div = 1;
89	best_freq = 1;
90	best_vco_dev = 1;
91
92	if( fixed_post_div == 0 ) {
93		min_post_div_idx = 0;
94		for(
95			max_post_div_idx = 0;
96			pll->post_divs[max_post_div_idx].divider != 0;
97			++max_post_div_idx )
98			;
99		--max_post_div_idx;
100	} else {
101		for(
102			min_post_div_idx = 0;
103			pll->post_divs[min_post_div_idx].divider != fixed_post_div;
104			++min_post_div_idx )
105			;
106
107		max_post_div_idx = min_post_div_idx;
108
109		//SHOW_FLOW( 2, "idx of fixed post divider: %d", min_post_div_idx );
110	}
111
112	// post dividers are quite restrictive, so they provide little search space only
113	for( extra_post_div_idx = 0; pll->extra_post_divs[extra_post_div_idx].divider != 0; ++extra_post_div_idx ) {
114		for( post_div_idx = min_post_div_idx; post_div_idx <= max_post_div_idx; ++post_div_idx ) {
115			uint32 ref_div;
116			uint32 post_div =
117				pll->post_divs[post_div_idx].divider
118				* pll->extra_post_divs[extra_post_div_idx].divider;
119
120			// post devider determines VCO frequency, so determine and verify it;
121			// freq is in Hz, everything else is in 10 kHz units
122			// we use 10 kHz units as long as possible to avoid uint32 overflows
123			uint32 vco = (freq / 10000) * post_div;
124
125			//SHOW_FLOW( 2, "post_div=%d, vco=%d", post_div, vco );
126
127			if( vco < pll->vco_min || vco > pll->vco_max )
128				continue;
129
130			//SHOW_FLOW0( 2, "jau" );
131
132			// we can either iterate through feedback or reference dividers;
133			// usually, there are fewer possible reference dividers, so I picked them
134			for( ref_div = pll->min_ref_div; ref_div <= pll->max_ref_div; ++ref_div ) {
135				uint32 feedback_div, cur_freq;
136				int32 error, vco_dev;
137
138				// this implies the frequency of the lock unit
139				uint32 pll_in = pll->ref_freq / ref_div;
140
141				if( pll_in < pll->pll_in_min || pll_in > pll->pll_in_max )
142					continue;
143
144				// well, only one variable is left
145				// timing is almost certainly valid, time to use Hz units
146				feedback_div = RoundDiv64(
147					(int64)freq * ref_div * post_div,
148					pll->ref_freq * 10000 * pll->extra_feedback_div);
149
150				if( feedback_div < pll->min_feedback_div ||
151					feedback_div > pll->max_feedback_div )
152					continue;
153
154				// let's see what we've got
155				cur_freq = RoundDiv64(
156					(int64)pll->ref_freq * 10000 * feedback_div * pll->extra_feedback_div,
157					ref_div * post_div );
158
159				// absolute error in terms of output clock
160				error = abs( (int32)cur_freq - (int32)freq );
161				// deviation from perfect VCO clock
162				vco_dev = abs( (int32)vco - (int32)(pll->best_vco) );
163
164				// if there is no optimal VCO frequency, choose setting with less error;
165				// if there is an optimal VCO frequency, choose new settings if
166				// - error is reduced significantly (100 Hz or more), or
167				// - output frequency is almost the same (less then 100 Hz difference) but
168				//	 VCO frequency is closer to best frequency
169				if( (pll->best_vco == 0 && error < best_error) ||
170					(pll->best_vco != 0 &&
171					 (error < best_error - 100 ||
172					 (abs( error - best_error ) < 100 && vco_dev < best_vco_dev ))))
173				{
174					//SHOW_FLOW( 2, "got freq=%d, best_freq=%d", freq, cur_freq );
175					best_post_div_idx = post_div_idx;
176					best_extra_post_div_idx = extra_post_div_idx;
177					best_ref_div = ref_div;
178					best_feedback_div = feedback_div;
179					best_freq = cur_freq;
180					best_error = error;
181					best_vco_dev = vco_dev;
182				}
183			}
184		}
185	}
186
187	dividers->post_code = pll->post_divs[best_post_div_idx].code;
188	dividers->post = pll->post_divs[best_post_div_idx].divider;
189	dividers->extra_post_code = pll->post_divs[best_extra_post_div_idx].code;
190	dividers->extra_post = pll->post_divs[best_extra_post_div_idx].divider;
191	dividers->ref = best_ref_div;
192	dividers->feedback = best_feedback_div;
193	dividers->freq = best_freq;
194
195	/*SHOW_FLOW( 2, "post_code=%d, post=%d, extra_post_code=%d, extra_post=%d, ref=%d, feedback=%d, freq=%d",
196		dividers->post_code, dividers->post, dividers->extra_post_code,
197		dividers->extra_post, dividers->ref, dividers->feedback, dividers->freq );*/
198}
199
200
201// with a TV timing given, find a corresponding CRT timing.
202// both timing must meet at the end of a frame, but as the PLL has a
203// limited frequency granularity, you don't really get a CRT timing
204// with precisely the same frame rate; the solution is to tweak the CRT
205// image a bit by making it wider/taller/smaller until the frame rate
206// drift is under a given threshold;
207// we follow two aims:
208// 	- primary, keep frame rate in sync
209//  - secondary, only tweak as much as unavoidable
210void Radeon_MatchCRTPLL(
211	const pll_info *pll,
212	uint32 tv_v_total, uint32 tv_h_total, uint32 tv_frame_size_adjust, uint32 freq,
213	const display_mode *mode, uint32 max_v_tweak, uint32 max_h_tweak,
214	uint32 max_frame_rate_drift, uint32 fixed_post_div,
215	pll_dividers *dividers,
216	display_mode *tweaked_mode )
217{
218	uint32 v_tweak;
219	int32 v_tweak_dir;
220	uint32 pix_per_tv_frame;
221
222	SHOW_FLOW( 2, "fixed post divider: %d", fixed_post_div );
223
224	// number of TV pixels per frame
225	pix_per_tv_frame = tv_v_total * tv_h_total + tv_frame_size_adjust;
226
227	// starting with original data we tweak total horizontal and vertical size
228	// more and more until we find a proper CRT clock frequency
229	for( v_tweak = 0; v_tweak <= max_v_tweak; ++v_tweak ) {
230		for( v_tweak_dir = -1; v_tweak_dir <= 1; v_tweak_dir += 2 ) {
231			uint32 h_tweak;
232			int32 h_tweak_dir;
233
234			uint32 v_total = mode->timing.v_total + v_tweak * v_tweak_dir;
235
236			for( h_tweak = 0; h_tweak <= max_h_tweak; ++h_tweak ) {
237				for( h_tweak_dir = -1; h_tweak_dir <= 1; h_tweak_dir += 2 ) {
238					uint32 pix_per_crt_frame, frame_rate_drift;
239					uint32 crt_freq;
240					uint32 abs_crt_error;
241
242					uint32 h_total = mode->timing.h_total + h_tweak * h_tweak_dir;
243
244					// number of CRT pixels per frame
245					pix_per_crt_frame = v_total * h_total;
246
247					// frame rate must be:
248					//	frame_rate = freq / pix_per_tv_half_frame
249					// because of interlace, we must use half frames
250					//	pix_per_tv_half_frame = pix_per_tv_frame / 2
251					// to get a CRT image with the same frame rate, we get
252					//	crt_freq = frame_rate * pix_per_crt_frame
253					//	         = freq / (pix_per_tv_frame / 2) * pix_per_crt_frame
254					// formula is reordered as usual to improve accuracy
255					crt_freq = (uint64)freq * pix_per_crt_frame * 2 / pix_per_tv_frame;
256
257					Radeon_CalcPLLDividers( pll, crt_freq, fixed_post_div, dividers );
258
259					// get absolute CRT clock error per second
260					abs_crt_error = abs( (int32)(dividers->freq) - (int32)crt_freq );
261
262					//SHOW_INFO( 2, "whished=%d, is=%d", crt_freq, dividers->freq );
263
264					// convert it to relative CRT clock error:
265					//	rel_error = abs_crt_error / crt_freq
266					// now to absolute TV clock error per second:
267					//	abs_tv_error = rel_error * tv_freq
268					// and finally to TV clock error per frame:
269					//	frame_rate_drift = abs_tv_error / frame_rate
270					//	                 = abs_crt_error / crt_freq * tv_freq / frame_rate
271					// this can be simplified by using:
272					//	tv_freq = pix_per_tv_frame * frame_rate
273					// so we get:
274					//	frame_rate_drift = abs_crt_error / crt_freq * pix_per_tv_frame * frame_rate / frame_rate
275					//	                 = abs_crt_error / crt_freq * pix_per_tv_frame
276					frame_rate_drift = (uint64)abs_crt_error * pix_per_tv_frame / freq;
277
278					// if drift is within threshold, we take this setting and stop
279					// searching (later iteration will increasingly tweak screen size,
280					// and we don't really want that)
281					if( frame_rate_drift <= max_frame_rate_drift ) {
282						SHOW_INFO( 2, "frame_rate_drift=%d, crt_freq=%d, v_total=%d, h_total=%d",
283							frame_rate_drift, crt_freq, v_total, h_total );
284
285						tweaked_mode->timing.pixel_clock = crt_freq;
286						tweaked_mode->timing.v_total = v_total;
287						tweaked_mode->timing.h_total = h_total;
288						return;
289					}
290				}
291			}
292		}
293    }
294}
295
296
297// table to map divider to register value
298static pll_divider_map post_divs[] = {
299	{  1, 0 },
300	{  2, 1 },
301	{  4, 2 },
302	{  8, 3 },
303	{  3, 4 },
304//	{ 16, 5 },	// at least for pll2 of M6, this value is reserved
305	{  6, 6 },
306	{ 12, 7 },
307	{  0, 0 }
308};
309
310
311// normal PLLs have no extra post divider
312static pll_divider_map extra_post_divs[] = {
313	{ 1, 1 },
314	{ 0, 0 }
315};
316
317
318// extra post-divider provided by Rage Theatre
319static pll_divider_map external_extra_post_divs[] = {
320	{ 1, 0 },
321	{ 2, 1 },
322	{ 0, 0 }
323};
324
325
326// post-dividers of Rage Theatre
327static pll_divider_map tv_post_divs[] = {
328	{  1, 1 },
329	{  2, 2 },
330	{  3, 3 },
331	{  4, 4 },
332	{  5, 5 },
333	{  6, 6 },
334	{  7, 7 },
335	{  8, 8 },
336	{  9, 9 },
337	{ 10, 10 },
338	{ 11, 11 },
339	{ 12, 12 },
340	{ 13, 13 },
341	{ 14, 14 },
342	{ 15, 15 },
343	{  0, 0 }
344};
345
346
347// get PLL parameters of TV PLL
348void Radeon_GetTVPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
349	bool internal_encoder )
350{
351	pll->post_divs = tv_post_divs;
352	pll->extra_post_divs = internal_encoder ? extra_post_divs : external_extra_post_divs;
353	pll->ref_freq = general_pll->ref_freq;
354	pll->vco_min = 10000;
355	pll->vco_max = 25000;
356	// I'm not sure about the upper limit
357	pll->min_ref_div = 4;
358	pll->max_ref_div = 0x3ff;
359	// in the original code, they set it to 330kHz if PAL is requested and
360	// quartz is 27 MHz, but I don't see how these circumstances can effect the
361	// mimimal PLL input frequency
362	pll->pll_in_min = 20;//40;
363	// in the original code, they don't define an upper limit
364	pll->pll_in_max = 100;
365	pll->extra_feedback_div = 1;
366	pll->min_feedback_div = 4;
367	pll->max_feedback_div = 0x7ff;
368	pll->best_vco = 21000;
369}
370
371
372// get PLL parameters of CRT PLL used in conjunction with TV-out
373void Radeon_GetTVCRTPLLConfiguration( const general_pll_info *general_pll, pll_info *pll,
374	bool internal_tv_encoder )
375{
376	pll->post_divs = post_divs;
377	pll->extra_post_divs = extra_post_divs;
378	pll->ref_freq = general_pll->ref_freq;
379
380	// in sample code, these limits are set in a strange way;
381	// as a first shot, I use the BIOS provided limits
382	/*pll->vco_min = general_pll->min_pll_freq;
383	pll->vco_max = general_pll->max_pll_freq;*/
384
385	// in sample code, they use a variable post divider during calculation, but
386	// use a fixed post divider for programming - the variable post divider is
387	// multiplied to the feedback divider;
388	// because of the fixed post divider (3), the VCO always runs far out of
389	// its stable frequency range, so we have hack the limits
390	pll->vco_min = 4000;
391	pll->vco_max = general_pll->max_pll_freq;
392
393	// in sample code, lower limit is 4, but in register spec they say everything but 0/1
394	pll->min_ref_div = 2;
395	pll->max_ref_div = 0x3ff;
396	pll->pll_in_min = 20;
397	pll->pll_in_max = 100;
398	pll->extra_feedback_div = 1;
399	pll->min_feedback_div = 4;
400	pll->max_feedback_div = 0x7ff;
401	pll->best_vco = internal_tv_encoder ? 17500 : 21000;
402}
403
404
405// calc PLL dividers for CRT
406// mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
407void Radeon_CalcCRTPLLDividers(
408	const general_pll_info *general_pll, const display_mode *mode, pll_dividers *dividers )
409{
410	pll_info pll;
411
412	pll.post_divs = post_divs;
413	pll.extra_post_divs = extra_post_divs;
414	pll.ref_freq = general_pll->ref_freq;
415	pll.vco_min = general_pll->min_pll_freq;
416	pll.vco_max = general_pll->max_pll_freq;
417	pll.min_ref_div = 2;
418	pll.max_ref_div = 0x3ff;
419	pll.pll_in_min = 40;
420	pll.pll_in_max = 100;
421	pll.extra_feedback_div = 1;
422	pll.min_feedback_div = 4;
423	pll.max_feedback_div = 0x7ff;
424	pll.best_vco = 0;
425
426	SHOW_FLOW( 2, "freq=%ld", mode->timing.pixel_clock );
427
428	Radeon_CalcPLLDividers( &pll, mode->timing.pixel_clock, 0, dividers );
429}
430
431
432// calculate PLL registers
433// mode->timing.pixel_clock must be in Hz because required accuracy in TV-Out mode
434// (old: freq is in 10kHz)
435void Radeon_CalcPLLRegisters(
436	const display_mode *mode, const pll_dividers *dividers, pll_regs *values )
437{
438	values->dot_clock_freq = dividers->freq;
439	values->feedback_div   = dividers->feedback;
440	values->post_div       = dividers->post;
441	values->pll_output_freq = dividers->freq * dividers->post;
442
443	values->ppll_ref_div   = dividers->ref;
444	values->ppll_div_3     = (dividers->feedback | (dividers->post_code << 16));
445	// this is mad: the PLL controls the horizontal length in sub-byte precision!
446	values->htotal_cntl    = mode->timing.h_total & 7;
447
448	SHOW_FLOW( 2, "dot_clock_freq=%ld, pll_output_freq=%ld, ref_div=%d, feedback_div=%d, post_div=%d",
449		values->dot_clock_freq, values->pll_output_freq,
450		values->ppll_ref_div, values->feedback_div, values->post_div );
451}
452
453// write values into PLL registers
454void Radeon_ProgramPLL(
455	accelerator_info *ai, int crtc_idx, pll_regs *values )
456{
457	vuint8 *regs = ai->regs;
458	radeon_type asic = ai->si->asic;
459
460	SHOW_FLOW0( 2, "" );
461
462	// use some other PLL for pixel clock source to not fiddling with PLL
463	// while somebody is using it
464    Radeon_OUTPLLP( regs, asic, crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
465    	RADEON_VCLK_SRC_CPU_CLK, ~RADEON_VCLK_SRC_SEL_MASK );
466
467    Radeon_OUTPLLP( regs, asic,
468		crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL,
469	    RADEON_PPLL_RESET
470	    | RADEON_PPLL_ATOMIC_UPDATE_EN
471	    | RADEON_PPLL_VGA_ATOMIC_UPDATE_EN,
472	    ~(RADEON_PPLL_RESET
473		| RADEON_PPLL_ATOMIC_UPDATE_EN
474		| RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );
475
476	// select divider 3 (well, only required for first PLL)
477    OUTREGP( regs, RADEON_CLOCK_CNTL_INDEX,
478	    RADEON_PLL_DIV_SEL_DIV3,
479	    ~RADEON_PLL_DIV_SEL_MASK );
480
481	RADEONPllErrataAfterIndex(regs, asic);
482
483	if( ai->si->new_pll && crtc_idx == 0 ) {
484		// starting with r300, the reference divider of the first PLL was
485		// moved to another bit position; at the old location, you only
486		// find the "BIOS suggested divider"; no clue why they did that
487		Radeon_OUTPLLP( regs, asic,
488    		RADEON_PPLL_REF_DIV,
489    		values->ppll_ref_div << RADEON_PPLL_REF_DIV_ACC_SHIFT,
490    		~RADEON_PPLL_REF_DIV_ACC_MASK );
491	} else {
492	    Radeon_OUTPLLP( regs, asic,
493    		crtc_idx == 0 ? RADEON_PPLL_REF_DIV : RADEON_P2PLL_REF_DIV,
494    		values->ppll_ref_div,
495    		~RADEON_PPLL_REF_DIV_MASK );
496    }
497
498    Radeon_OUTPLLP( regs, asic,
499    	crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
500    	values->ppll_div_3,
501    	~RADEON_PPLL_FB3_DIV_MASK );
502
503    Radeon_OUTPLLP( regs, asic,
504    	crtc_idx == 0 ? RADEON_PPLL_DIV_3 : RADEON_P2PLL_DIV_0,
505    	values->ppll_div_3,
506    	~RADEON_PPLL_POST3_DIV_MASK );
507
508    Radeon_PLLWriteUpdate( ai, crtc_idx );
509    Radeon_PLLWaitForReadUpdateComplete( ai, crtc_idx );
510
511    Radeon_OUTPLL( regs, asic,
512    	crtc_idx == 0 ? RADEON_HTOTAL_CNTL : RADEON_HTOTAL2_CNTL,
513    	values->htotal_cntl );
514
515	Radeon_OUTPLLP( regs, asic,
516		crtc_idx == 0 ? RADEON_PPLL_CNTL : RADEON_P2PLL_CNTL, 0,
517		~(RADEON_PPLL_RESET
518		| RADEON_PPLL_SLEEP
519		| RADEON_PPLL_ATOMIC_UPDATE_EN
520		| RADEON_PPLL_VGA_ATOMIC_UPDATE_EN) );
521
522	// there is no way to check whether PLL has settled, so wait a bit
523	snooze( 5000 );
524
525	// use PLL for pixel clock again
526    Radeon_OUTPLLP( regs, asic,
527    	crtc_idx == 0 ? RADEON_VCLK_ECP_CNTL : RADEON_PIXCLKS_CNTL,
528    	RADEON_VCLK_SRC_PPLL_CLK, ~RADEON_VCLK_SRC_SEL_MASK );
529}
530