Xemu [doxygen]  hyppo 0a42be3a057156924bc1b626a687bd6e27349c45 @ Sat 19 Mar 02:15:11 CET 2022
basic_text.c
Go to the documentation of this file.
1 /* Part of the Xemu project, please visit: https://github.com/lgblgblgb/xemu
2  Copyright (C)2016-2021 LGB (Gábor Lénárt) <lgblgblgb@gmail.com>
3 
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
8 
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
17 
18 
19 /* Note: currently this is for CBM BASIC 10 (or maybe 2, since the 'base' token set is the same)
20  * TODO: extend this to more CBM BASIC versions and also to other BASIC dialects used in Xemu!!! */
21 
22 #include "xemu/emutools.h"
23 #include "xemu/basic_text.h"
24 #include <stdlib.h>
25 #include <strings.h>
26 
27 
28 
29 #ifdef CBM_BASIC_TEXT_SUPPORT
30 
31 // This table is from VICE (utility petcat), though with a heavy edited form, current
32 // no multiple BASIC versions etc, just straight BASIC 10 ...
33 // Later, this source should be extended to all CBM BASIC dialects switchable, also
34 // probably with other non-commodore BASICs should be considered as well, available
35 // in Xemu ...
36 
37 static const struct {
38  const Uint16 token;
39  const char *str;
40 } basic_tokens[] = {
41  { 0x80,"end" },
42  { 0x81,"for" },
43  { 0x82,"next" },
44  { 0x83,"data" },
45  { 0x84,"input#" },
46  { 0x85,"input" },
47  { 0x86,"dim" },
48  { 0x87,"read" },
49  { 0x88,"let" },
50  { 0x89,"goto" },
51  { 0x8a,"run" },
52  { 0x8b,"if" },
53  { 0x8c,"restore" },
54  { 0x8d,"gosub" },
55  { 0x8e,"return" },
56  { 0x8f,"rem" },
57 
58  { 0x90,"stop" },
59  { 0x91,"on" },
60  { 0x92,"wait" },
61  { 0x93,"load" },
62  { 0x94,"save" },
63  { 0x95,"verify" },
64  { 0x96,"def" },
65  { 0x97,"poke" },
66  { 0x98,"print#" },
67  { 0x99,"print" },
68  { 0x9a,"cont" },
69  { 0x9b,"list" },
70  { 0x9c,"clr" },
71  { 0x9d,"cmd" },
72  { 0x9e,"sys" },
73  { 0x9f,"open" },
74 
75  { 0xa0,"close" },
76  { 0xa1,"get" },
77  { 0xa2,"new" },
78  { 0xa3,"tab(" },
79  { 0xa4,"to" },
80  { 0xa5,"fn" },
81  { 0xa6,"spc(" },
82  { 0xa7,"then" },
83  { 0xa8,"not" },
84  { 0xa9,"step" },
85  { 0xaa,"+" },
86  { 0xab,"-" },
87  { 0xac,"*" },
88  { 0xad,"/" },
89  { 0xae,"^" },
90  { 0xaf,"and" },
91 
92  { 0xb0,"or" },
93  { 0xb1,">" },
94  { 0xb2,"=" },
95  { 0xb3,"<" },
96  { 0xb4,"sgn" },
97  { 0xb5,"int" },
98  { 0xb6,"abs" },
99  { 0xb7,"usr" },
100  { 0xb8,"fre" },
101  { 0xb9,"pos" },
102  { 0xba,"sqr" },
103  { 0xbb,"rnd" },
104  { 0xbc,"log" },
105  { 0xbd,"exp" },
106  { 0xbe,"cos" },
107  { 0xbf,"sin" },
108 
109  { 0xc0,"tan" },
110  { 0xc1,"atn" },
111  { 0xc2,"peek" },
112  { 0xc3,"len" },
113  { 0xc4,"str$" },
114  { 0xc5,"val" },
115  { 0xc6,"asc" },
116  { 0xc7,"chr$" },
117  { 0xc8,"left$" },
118  { 0xc9,"right$" },
119  { 0xca,"mid$" },
120  { 0xcb,"go" },
121  { 0xcc,"rgr" },
122  { 0xcd,"rclr" },
123  { 0xcf,"joy" },
124 
125  { 0xd0,"rdot" },
126  { 0xd1,"dec" },
127  { 0xd2,"hex$" },
128  { 0xd3,"err$" },
129  { 0xd4,"instr" },
130  { 0xd5,"else" },
131  { 0xd6,"resume" },
132  { 0xd7,"trap" },
133  { 0xd8,"tron" },
134  { 0xd9,"troff" },
135  { 0xda,"sound" },
136  { 0xdb,"vol" },
137  { 0xdc,"auto" },
138  { 0xdd,"pudef" },
139  { 0xde,"graphic" },
140  { 0xdf,"paint" },
141 
142  { 0xe0,"char" },
143  { 0xe1,"box" },
144  { 0xe2,"circle" },
145  { 0xe3,"gshape" },
146  { 0xe4,"sshape" },
147  { 0xe5,"draw" },
148  { 0xe6,"locate" },
149  { 0xe7,"color" },
150  { 0xe8,"scnclr" },
151  { 0xe9,"scale" },
152  { 0xea,"help" },
153  { 0xeb,"do" },
154  { 0xec,"loop" },
155  { 0xed,"exit" },
156  { 0xee,"directory"},
157  { 0xef,"dsave" },
158 
159  { 0xf0,"dload" },
160  { 0xf1,"header" },
161  { 0xf2,"scratch" },
162  { 0xf3,"collect" },
163  { 0xf4,"copy" },
164  { 0xf5,"rename" },
165  { 0xf6,"backup" },
166  { 0xf7,"delete" },
167  { 0xf8,"renumber" },
168  { 0xf9,"key" },
169  { 0xfa,"monitor" },
170  { 0xfb,"using" },
171  { 0xfc,"until" },
172  { 0xfd,"while" },
173 
174  {0xce02,"pot" },
175  {0xce03,"bump" },
176  {0xce04,"pen" },
177  {0xce05,"rsppos" },
178  {0xce06,"rsprite" },
179  {0xce07,"rspcolor" },
180  {0xce08,"xor" },
181  {0xce09,"rwindow" },
182  {0xce0a,"pointer" },
183 
184  {0xfe02,"bank" },
185  {0xfe03,"filter" },
186  {0xfe04,"play" },
187  {0xfe05,"tempo" },
188  {0xfe06,"movspr" },
189  {0xfe07,"sprite" },
190  {0xfe08,"sprcolor" },
191  {0xfe09,"rreg" },
192  {0xfe0a,"envelope" },
193  {0xfe0b,"sleep" },
194  {0xfe0c,"catalog" },
195  {0xfe0d,"dopen" },
196  {0xfe0e,"append" },
197  {0xfe0f,"dclose" },
198 
199  {0xfe10,"bsave" },
200  {0xfe11,"bload" },
201  {0xfe12,"record" },
202  {0xfe13,"concat" },
203  {0xfe14,"dverify" },
204  {0xfe15,"dclear" },
205  {0xfe16,"sprsav" },
206  {0xfe17,"collision"},
207  {0xfe18,"begin" },
208  {0xfe19,"bend" },
209  {0xfe1a,"window" },
210  {0xfe1b,"boot" },
211  {0xfe1c,"width" },
212  {0xfe1d,"sprdef" },
213  {0xfe1e,"quit" },
214  {0xfe1f,"stash" },
215 
216  {0xfe21,"fetch" },
217  {0xfe23,"swap" },
218  {0xfe24,"off" },
219  {0xfe25,"fast" },
220  {0xfe26,"slow" },
221  {0, NULL }
222 
223 };
224 
225 
226 //char xemu_basic_decoder_error[1024];
227 
228 
229 static const char *ERROR_HEAD = "BASIC program exporting error:\n";
230 
231 static const char *end_of_line = NL; // native OS line ending is used ...
232 static const char *tex_head = "\\verbatimfont{\\codefont}\n\\begin{verbatim}\n";
233 static const char *tex_end = "\\end{verbatim}\n";
234 
235 
236 #define CONTEXT_POINTER0 0
237 #define CONTEXT_POINTER1 1
238 #define CONTEXT_LINENO0 2
239 #define CONTEXT_LINENO1 3
240 #define CONTEXT_NORMAL 4
241 #define CONTEXT_QUOTED 5
242 #define CONTEXT_REM 6
243 #define CONTEXT_BEGIN 7
244 #define CONTEXT_END 8
245 
246 
247 int xemu_basic_to_text_malloc ( Uint8 **buffer, int output_super_limit, const Uint8 *prg, int real_addr, const Uint8 *prg_limit, int basic_dialect, int flags )
248 {
249  int size = xemu_basic_to_text(NULL, output_super_limit, prg, real_addr, prg_limit, basic_dialect, flags);
250  if (size < 0) {
251  *buffer = NULL;
252  return size;
253  }
254  *buffer = malloc(size + 1);
255  if (!*buffer) {
256  ERROR_WINDOW("%sCannot allocate memory", ERROR_HEAD);
257  return -1;
258  }
259  size = xemu_basic_to_text(*buffer, size, prg, real_addr, prg_limit, basic_dialect, flags);
260  if (size < 0) {
261  free(*buffer);
262  return size;
263  }
264  buffer[size] = 0;
265  return size;
266 }
267 
268 
269 int xemu_basic_to_text ( Uint8 *output, int output_size, const Uint8 *prg, int real_addr, const Uint8 *prg_limit, int basic_dialect, int flags )
270 {
271  int output_used = 0;
272  char outbuf[256]; // the longest thing here is a single one-time decoded entity. This is waaay too much, but let's play safe ...
273  const char *o = NULL;
274  int num = -1, old_num = -1;
275  int ptr = 0;
276  int context = CONTEXT_BEGIN;
277  int token = 0;
278  for (;;) {
279  if (XEMU_UNLIKELY(prg > prg_limit)) {
280  ERROR_WINDOW("%sProgram flows outside of the allowed memory area after line %d", ERROR_HEAD, old_num);
281  return -1;
282  }
283  Uint8 c = *prg++;
284  real_addr++;
285  if (context == CONTEXT_BEGIN) {
286  ptr = c;
287  context = CONTEXT_POINTER1; // CONTEXT_BEGIN is same as CONTEXT_POINTER0, just occures once, so at the first line, we simply skip CONTEXT_POINTER0 since they're the same in a way ;-P Hard to explain
288  if ((flags & BASIC_TO_TEXT_FLAG_TEX))
289  o = tex_head;
290  else
291  continue;
292  } else if (context == CONTEXT_POINTER0) {
293  printf("ptr_to_here=%d real_addr=%d, delta=%d\n", ptr, real_addr, real_addr-ptr);
294  if (ptr != real_addr - 1) {
295  ERROR_WINDOW("%sBad BASIC chaining near line %d", ERROR_HEAD, num);
296  return -1;
297  }
298  ptr = c;
299  context = CONTEXT_POINTER1;
300  continue;
301  } else if (context == CONTEXT_POINTER1) {
302  ptr += c << 8;
303  if (XEMU_UNLIKELY(!ptr)) {
304  if ((flags & BASIC_TO_TEXT_FLAG_TEX)) {
305  context = CONTEXT_END;
306  o = tex_end;
307  } else
308  break; // END OF BASIC PROGRAM ;)
309  } else {
310  context = CONTEXT_LINENO0;
311  continue;
312  }
313  } else if (context == CONTEXT_LINENO0) {
314  context = CONTEXT_LINENO1;
315  old_num = num;
316  num = c;
317  continue;
318  } else if (context == CONTEXT_LINENO1) {
319  num += c << 8;
320  context = CONTEXT_NORMAL;
321  sprintf(outbuf, "%5d ", num);
322  o = outbuf;
323  } else if (context == CONTEXT_NORMAL && (c >= 0x80 || token != 0)) {
324  if ((c == 0xCE || c == 0xFE) && token == 0) {
325  token = c << 8;
326  } else {
327  token += c;
328  for (int a = 0 ;; a++) {
329  if (basic_tokens[a].token == token) {
330  o = basic_tokens[a].str;
331  break; // token found, "o" holds what it must be output
332  } else if (basic_tokens[a].token == 0) {
333  ERROR_WINDOW("%sUnknown token $%04X in line %d", ERROR_HEAD, token, num);
334  return -1;
335  }
336  }
337  if (token == 0x8F) // REM
338  context = CONTEXT_REM;
339  token = 0;
340  }
341  } else if (c == 0) { // end of basic line
342  if (XEMU_UNLIKELY(token)) { // TODO: maybe it's useless to check? see the condition above ...
343  ERROR_WINDOW("%sUnfinished extended token sequence in line %d", ERROR_HEAD, num);
344  return -1;
345  }
346  context = CONTEXT_POINTER0;
347  o = end_of_line;
348  } else {
349  if (c == '"') {
350  if (context == CONTEXT_NORMAL)
351  context = CONTEXT_QUOTED;
352  else if (context == CONTEXT_QUOTED)
353  context = CONTEXT_NORMAL;
354  }
355  if (c >= 'A' && c <= 'Z')
356  c += 'a' - 'A';
357  else if (c >= ('A' | 0x80) && c <= ('Z' | 0x80))
358  c &= 0x7F;
359  outbuf[0] = c;
360  outbuf[1] = 0;
361  o = outbuf;
362  }
363  // flush output in a safe way
364  if (XEMU_UNLIKELY(!o)) { // just for code errors, should not happen!
365  ERROR_WINDOW("%sInternal error, uninitialized output buffer in line %d", ERROR_HEAD, num);
366  return -1;
367  }
368  while (*o) {
369  if (XEMU_UNLIKELY(output_used >= output_size)) {
370  ERROR_WINDOW("%sOutput buffer is too small for this program in line %d", ERROR_HEAD, num);
371  return -1;
372  }
373  output_used++;
374  if (output)
375  *output++ = *o++;
376  else
377  o++; // this mode (when output is NULL) is just for calculating the size of output
378  }
379  if (XEMU_UNLIKELY(context == CONTEXT_END))
380  break; // END OF BASIC PROGRAM :)
381  o = NULL; // just for code errors, should not happen!
382  }
383  return output_used; // END OF BASIC PROGRAM :) Give back the size of output buffer used
384 }
385 
386 #endif
387 
388 
389 static const struct {
391  const char *text;
392 } conv_tab_screen[] = {
393  { 0x00, "@" },
394  { 0x1B, "[" },
395  { 0x1C, "\\" }, // pound symbol, mapped to backslash
396  { 0x1C, "{pound}" }, // pound symbol, alternative representation
397  { 0x1C, "£" }, // pound symbol (UTF8!)
398  { 0x1D, "]" },
399  { 0x1E, "^" }, // up-arrow symbol
400  { 0x1F, "_" }, // left-arrow symbol, mapped to underscore
401  { 0x40, "{dash}" },
402  { 0x5E, "{pi}" },
403  { 0x93, "{home}" },
404  // $80-$9F
405  { 0x82, "{uloff}"}, { 0x83, "{stop}" }, { 0x85, "{wht}" }, { 0x87, "{bell}" },
406  { 0x89, "{ht}" }, { 0x8A, "{lf}" }, { 0x8B, "{shen}" }, { 0x8C, "{shdi}" }, { 0x8D, "{ret}" }, { 0x8E, "{text}" }, { 0x8F, "{flon}" },
407  { 0x90, "{f9}" }, { 0x91, "{down}" }, { 0x92, "{rvon}" }, { 0x93, "{home}" }, { 0x94, "{del}" }, { 0x95, "{f10}" }, { 0x96, "{f11}" }, { 0x97, "{f12}" },
408  { 0x98, "{tab}" }, { 0x99, "{f13}" }, { 0x9A, "{f14}" }, { 0x9B, "{esc}" }, { 0x9C, "{red}" }, { 0x9D, "{right}"}, { 0x9E, "{grn}" }, { 0x9F, "{blu}" },
409  // $E0-$DF
410  { 0xE1, "{orng}" }, { 0xE2, "{ulon}" }, { 0xE3, "{run}" }, { 0xE4, "{help}" }, { 0xE5, "{f1}" }, { 0xE6, "{f3}" }, { 0xE7, "{f5}" },
411  { 0xE8, "{f7}" }, { 0xE9, "{f2}" }, { 0xEA, "{f4}" }, { 0xEB, "{f6}" }, { 0xEC, "{f8}" }, { 0xED, "{sret}" }, { 0xEE, "{gfx}" }, { 0xEF, "{floff}"},
412  { 0xD0, "{blk}" }, { 0xD1, "{up}" }, { 0xD2, "{rvoff}"}, { 0xD3, "{clr}" }, { 0xD4, "{inst}" }, { 0xD5, "{brn}" }, { 0xD6, "{lred}" }, { 0xD7, "{gry1}" },
413  { 0xD8, "{gry2}" }, { 0xD9, "{lgrn}" }, { 0xDA, "{lblu}" }, { 0xDB, "{gry3}" }, { 0xDC, "{pur}" }, { 0xDD, "{left}" }, { 0xDE, "{yel}" }, { 0xDF, "{cyn}" },
414  { 0x00, NULL }
415 };
416 
417 
418 
419 char *xemu_cbm_screen_to_text ( char *buffer, const int buffer_size, const Uint8 *v, const int cols, const int rows, const int lowercase )
420 {
421  char *t = buffer;
422  for (int y = 0; y < rows; y++) {
423  for (int x = 0; x < cols; x++) {
424  if (XEMU_UNLIKELY(t - buffer > buffer_size - 16)) {
425  ERROR_WINDOW("Sorry, ASCII converted screen does not fit into the output buffer");
426  return NULL;
427  }
428  Uint8 c = *v++;
429  // first, check out translation table for special cases
430  for (int a = 0; conv_tab_screen[a].text; a++) {
431  if (conv_tab_screen[a].screen_code == c) {
432  t += sprintf(t, "%s", conv_tab_screen[a].text);
433  goto next_char;
434  }
435  }
436  //const Uint8 inv = c & 0x80;
437  //c &= 0x7F;
438  if (c >= 0x01 && c <= 0x1A) { // Capital A-Z (in uppercase mode), a-z (in lower case mode)
439  *t++ = c - 1 + (lowercase ? 'a' : 'A');
440  continue;
441  }
442  if (c >= 0x20 && c <= 0x3F) { // space, various common marks, numbers: at the same place as in ASCII!
443  *t++ = c;
444  continue;
445  }
446  if (c >= 0x41 && c <= 0x5A) { // Gfx chars (in uppercase mode), A-Z (in lower case mode)
447  if (lowercase)
448  *t++ = c;
449  else
450  t += sprintf(t, "{%c}", c);
451  continue;
452  }
453  // Missing policy for remaining characters, let's dump with its screen code
454  t += sprintf(t, "{$%02X}", c);
455  // well yeah, do not say anything ... C does not leave too much choice to continue from a nested loop ...
456  next_char:
457  continue;
458  }
459  while (t > buffer && t[-1] == ' ') // remove trailing spaces
460  t--;
461  t += sprintf(t, "%s", NL); // put a newline
462  }
463  // remove empty lines from the end of our capture
464  while (t > buffer && (t[-1] == '\r' || t[-1] == '\n'))
465  t--;
466  strcpy(t, NL); // still, a final newline. THIS ALSO CLOSES OUR STRING with '\0'!!!!!
467  // remove empty lines from the beginning of our capture
468  while (*buffer == '\r' || *buffer == '\n')
469  buffer++;
470  // return our result!
471  return buffer;
472 }
473 
474 
475 int xemu_cbm_text_to_screen ( Uint8 *v, const int cols, const int rows, const char *buffer, const int lowercase )
476 {
477  const Uint8 *start = v;
478  const Uint8 *end = v + (cols * rows);
479  char ch_prev, ch = 0;
480  v += cols; // do not use the first line, we expect user to have the cursor there, to be safe
481  while (*buffer && v < end) {
482  ch_prev = ch;
483  // first, check out translation table for special cases
484  for (int a = 0; conv_tab_screen[a].text; a++) {
485  const int l = strlen(conv_tab_screen[a].text);
486  if (!strncmp(conv_tab_screen[a].text, buffer, l)) {
487  *v++ = conv_tab_screen[a].screen_code;
488  buffer += l;
489  ch = 0x40; // something, which is not newline ;)
490  goto next_char;
491  }
492  }
493  // fetch next to-be-pasted ASCII character
494  ch = *buffer++;
495  // Special sequences which are NOT handled by the "conv_tab_screen" loop above
496  if (ch == '{') {
497  const char *p = strchr(buffer, '}');
498  if (!p) {
499  *v++ = 0x1B; // just fake a '[' because ...
500  continue; // ... closing pair of '{' is not found, ignore this character
501  }
502  const int l = (int)(p - buffer);
503  if (l == 1) { // single char {X} case
504  *v++ = *buffer;
505  } else if (l == 3 && *buffer == '$') {
506  char *e;
507  const Uint8 h = (Uint8)strtol(buffer + 1, &e, 16);
508  if (e == p)
509  *v++ = h;
510  }
511  buffer = p + 1; // move to the next ASCII to-be-pasted character after '}'
512  continue;
513  }
514  if (ch == '}') { // if there is a '}' for whatever reason, translate into ']'
515  *v++ = 0x1D;
516  continue;
517  }
518  if (ch == '\n' || ch == '\r') {
519  if ((ch_prev == '\n' || ch_prev == '\r') && ch_prev != ch) {
520  ch_prev = 0;
521  continue; // \r\n or other multi-ctrl-char sequence for line break
522  }
523  // new line
524  while ((v - start) % cols)
525  *v++ = 32;
526  continue;
527  }
528  if (ch == '\t') { // space (also TAB is rendered as space for now ...)
529  *v++ = 32;
530  continue;
531  }
532  if ((signed char)ch < 32) // ignore invalid characters (as a signed byte value this is also true for >=128 unsigned ones)
533  continue;
534  if (ch >= 0x61 && ch <= 0x7A) { // ASCII small letters
535  *v++ = ch - 0x61 + 1;
536  continue;
537  }
538  if (ch >= 0x41 && ch <= 0x5A) { // ASCII capital letters
539  *v++ = lowercase ? ch : ch - 0x41 + 1;
540  continue;
541  }
542  if (ch >= 0x20 && ch <= 0x3F) { // space, various common marks, numbers: at the same place as in ASCII!
543  *v++ = ch;
544  continue;
545  }
546  // The unknown stuff ... Now mark with '@'
547  DEBUGPRINT("PASTE: unknown ASCII character: %c (%d)" NL, ch, (unsigned int)ch);
548  *v++ = 0;
549  next_char:
550  continue;
551  }
552  while (v < end && ((v - start) % cols))
553  *v++ = 32;
554  return 0;
555 }
basic_text.h
text
const char * text
Definition: basic_text.c:391
emutools.h
flags
Uint8 flags
Definition: z8k1.c:126
xemu_cbm_text_to_screen
int xemu_cbm_text_to_screen(Uint8 *v, const int cols, const int rows, const char *buffer, const int lowercase)
Definition: basic_text.c:475
Uint8
uint8_t Uint8
Definition: fat32.c:51
x
int x
Definition: console.c:27
DEBUGPRINT
#define DEBUGPRINT(...)
Definition: emutools_basicdefs.h:171
prg
char * prg
Definition: commodore_vic20.c:96
screen_code
const Uint8 screen_code
Definition: basic_text.c:390
ERROR_WINDOW
#define ERROR_WINDOW(...)
Definition: xep128.h:116
NL
#define NL
Definition: fat32.c:37
size
int size
Definition: inject.c:37
y
int y
Definition: console.c:27
Uint16
uint16_t Uint16
Definition: fat32.c:50
xemu_cbm_screen_to_text
char * xemu_cbm_screen_to_text(char *buffer, const int buffer_size, const Uint8 *v, const int cols, const int rows, const int lowercase)
Definition: basic_text.c:419
XEMU_UNLIKELY
#define XEMU_UNLIKELY(__x__)
Definition: emutools_basicdefs.h:125