1use super::jvm_types::{ConstantPool, ConstantPoolEntry, JvmInstruction};
2use crate::error::RuntimeError;
3use std::collections::HashMap;
4use std::io::{Cursor, Read};
5
6#[derive(Debug, Clone)]
7pub struct MethodInfo {
8 pub name: String,
9 pub descriptor: String,
10 pub bytecode: Vec<JvmInstruction>,
11 pub max_locals: usize,
12 pub max_stack: usize,
13}
14
15pub struct ClassFile {
16 pub constant_pool: ConstantPool,
17 pub main_method_bytecode: Vec<JvmInstruction>,
18 pub max_locals: usize,
19 pub max_stack: usize,
20 pub methods: HashMap<String, MethodInfo>, }
22
23pub struct ClassFileParser;
24
25impl ClassFileParser {
26 pub fn parse(data: &[u8]) -> Result<ClassFile, RuntimeError> {
27 let mut cursor = Cursor::new(data);
28
29 let magic = read_u32(&mut cursor)?;
31 if magic != 0xCAFEBABE {
32 return Err(RuntimeError::InvalidStackState); }
34
35 let _minor_version = read_u16(&mut cursor)?;
36 let _major_version = read_u16(&mut cursor)?;
37
38 let constant_pool_count = read_u16(&mut cursor)?;
40 let mut constant_pool = ConstantPool::new();
41
42 let mut i = 1;
44 while i < constant_pool_count {
45 let tag = read_u8(&mut cursor)?;
46 match tag {
47 1 => {
48 let length = read_u16(&mut cursor)?;
50 let mut bytes = vec![0u8; length as usize];
51 cursor
52 .read_exact(&mut bytes)
53 .map_err(|_| RuntimeError::InvalidStackState)?;
54
55 let utf8_string = String::from_utf8_lossy(&bytes).into_owned();
58 constant_pool
59 .add_utf8(utf8_string)
60 .map_err(|_e| RuntimeError::InvalidStackState)?;
61 }
62 3 => {
63 let value = read_i32(&mut cursor)?;
65 constant_pool
66 .add_integer(value)
67 .map_err(|_e| RuntimeError::InvalidStackState)?;
68 }
69 4 => {
70 let value = read_f32(&mut cursor)?;
72 constant_pool
73 .add_float(value)
74 .map_err(|_e| RuntimeError::InvalidStackState)?;
75 }
76 5 => {
77 let value = read_i64(&mut cursor)?;
79 constant_pool
80 .add_long(value)
81 .map_err(|_e| RuntimeError::InvalidStackState)?;
82 i += 1; }
85 6 => {
86 let value = read_f64(&mut cursor)?;
88 constant_pool
89 .add_double(value)
90 .map_err(|_e| RuntimeError::InvalidStackState)?;
91 i += 1; }
94 7 => {
95 let name_index = read_u16(&mut cursor)?;
97 constant_pool
98 .add_class(name_index)
99 .map_err(|_e| RuntimeError::InvalidStackState)?;
100 }
101 8 => {
102 let string_index = read_u16(&mut cursor)?;
104 constant_pool
105 .add_string(string_index)
106 .map_err(|_e| RuntimeError::InvalidStackState)?;
107 }
108 9 => {
109 let class_index = read_u16(&mut cursor)?;
111 let name_and_type_index = read_u16(&mut cursor)?;
112 constant_pool
113 .add_fieldref(class_index, name_and_type_index)
114 .map_err(|_e| RuntimeError::InvalidStackState)?;
115 }
116 10 => {
117 let class_index = read_u16(&mut cursor)?;
119 let name_and_type_index = read_u16(&mut cursor)?;
120 constant_pool
121 .add_methodref(class_index, name_and_type_index)
122 .map_err(|_e| RuntimeError::InvalidStackState)?;
123 }
124 12 => {
125 let name_index = read_u16(&mut cursor)?;
127 let descriptor_index = read_u16(&mut cursor)?;
128 constant_pool
129 .add_name_and_type(name_index, descriptor_index)
130 .map_err(|_e| RuntimeError::InvalidStackState)?;
131 }
132 11 => {
133 let class_index = read_u16(&mut cursor)?;
135 let name_and_type_index = read_u16(&mut cursor)?;
136 constant_pool
137 .add_methodref(class_index, name_and_type_index)
138 .map_err(|_e| RuntimeError::InvalidStackState)?;
139 }
140 15 => {
141 let _reference_kind = read_u8(&mut cursor)?;
143 let _reference_index = read_u16(&mut cursor)?;
144 constant_pool
146 .add_placeholder()
147 .map_err(|_e| RuntimeError::InvalidStackState)?;
148 }
149 16 => {
150 let _descriptor_index = read_u16(&mut cursor)?;
152 constant_pool
154 .add_placeholder()
155 .map_err(|_e| RuntimeError::InvalidStackState)?;
156 }
157 18 => {
158 let _bootstrap_method_attr_index = read_u16(&mut cursor)?;
160 let _name_and_type_index = read_u16(&mut cursor)?;
161 constant_pool
163 .add_placeholder()
164 .map_err(|_e| RuntimeError::InvalidStackState)?;
165 }
166 _ => {
167 return Err(RuntimeError::UnknownConstantPoolTag { tag, index: i });
169 }
170 }
171 i += 1;
172 }
173
174 let _access_flags = read_u16(&mut cursor)?;
176 let _this_class = read_u16(&mut cursor)?;
177 let _super_class = read_u16(&mut cursor)?;
178
179 let interfaces_count = read_u16(&mut cursor)?;
181 for _ in 0..interfaces_count {
182 let _interface = read_u16(&mut cursor)?;
183 }
184
185 let fields_count = read_u16(&mut cursor)?;
187 for _ in 0..fields_count {
188 let _access_flags = read_u16(&mut cursor)?;
189 let _name_index = read_u16(&mut cursor)?;
190 let _descriptor_index = read_u16(&mut cursor)?;
191 let attributes_count = read_u16(&mut cursor)?;
192 for _ in 0..attributes_count {
193 let _attribute_name_index = read_u16(&mut cursor)?;
194 let attribute_length = read_u32(&mut cursor)?;
195 for _ in 0..attribute_length {
197 read_u8(&mut cursor)?;
198 }
199 }
200 }
201
202 let methods_count = read_u16(&mut cursor)?;
204 let mut main_method_bytecode = Vec::new();
205 let mut max_locals = 0;
206 let mut max_stack = 0;
207 let mut methods = HashMap::new();
208
209 for _ in 0..methods_count {
210 let _access_flags = read_u16(&mut cursor)?;
211 let name_index = read_u16(&mut cursor)?;
212 let descriptor_index = read_u16(&mut cursor)?;
213 let attributes_count = read_u16(&mut cursor)?;
214
215 let method_name = get_utf8_from_pool(&constant_pool, name_index);
217 let method_descriptor = get_utf8_from_pool(&constant_pool, descriptor_index);
218
219 let (is_main_method, is_preferred) =
221 check_is_main_method(&constant_pool, name_index, descriptor_index);
222
223 let mut method_bytecode = Vec::new();
224 let mut method_max_locals = 0;
225 let mut method_max_stack = 0;
226
227 for _ in 0..attributes_count {
228 let attribute_name_index = read_u16(&mut cursor)?;
229 let attribute_length = read_u32(&mut cursor)?;
230
231 if check_is_code_attribute(&constant_pool, attribute_name_index) {
232 method_max_stack = read_u16(&mut cursor)? as usize;
233 method_max_locals = read_u16(&mut cursor)? as usize;
234 let code_length = read_u32(&mut cursor)?;
235
236 let mut bytecode = vec![0u8; code_length as usize];
238 cursor
239 .read_exact(&mut bytecode)
240 .map_err(|_| RuntimeError::InvalidStackState)?;
241 method_bytecode = parse_bytecode(&bytecode)?;
242
243 let exception_table_length = read_u16(&mut cursor)?;
245 for _ in 0..exception_table_length {
246 let _start_pc = read_u16(&mut cursor)?;
247 let _end_pc = read_u16(&mut cursor)?;
248 let _handler_pc = read_u16(&mut cursor)?;
249 let _catch_type = read_u16(&mut cursor)?;
250 }
251
252 let code_attributes_count = read_u16(&mut cursor)?;
254 for _ in 0..code_attributes_count {
255 let _code_attribute_name_index = read_u16(&mut cursor)?;
256 let code_attribute_length = read_u32(&mut cursor)?;
257 for _ in 0..code_attribute_length {
258 read_u8(&mut cursor)?;
259 }
260 }
261 } else {
262 for _ in 0..attribute_length {
264 read_u8(&mut cursor)?;
265 }
266 }
267 }
268
269 if !method_bytecode.is_empty() {
271 let method_info = MethodInfo {
272 name: method_name.clone(),
273 descriptor: method_descriptor.clone(),
274 bytecode: method_bytecode.clone(),
275 max_locals: method_max_locals,
276 max_stack: method_max_stack,
277 };
278 methods.insert(method_name.clone(), method_info);
279
280 if is_main_method && (main_method_bytecode.is_empty() || is_preferred) {
282 main_method_bytecode = method_bytecode;
283 max_locals = method_max_locals;
284 max_stack = method_max_stack;
285 }
286 }
287 }
288
289 Ok(ClassFile {
290 constant_pool,
291 main_method_bytecode,
292 max_locals,
293 max_stack,
294 methods,
295 })
296 }
297}
298
299fn parse_bytecode(bytecode: &[u8]) -> Result<Vec<JvmInstruction>, RuntimeError> {
300 let mut instructions = Vec::new();
301 let mut i = 0;
302
303 while i < bytecode.len() {
304 let opcode = bytecode[i];
305 i += 1;
306
307 match opcode {
308 0x00 => {
309 instructions.push(JvmInstruction::Nop);
311 }
312 0x02 => instructions.push(JvmInstruction::IconstM1),
313 0x03 => instructions.push(JvmInstruction::Iconst0),
314 0x04 => instructions.push(JvmInstruction::Iconst1),
315 0x05 => instructions.push(JvmInstruction::Iconst2),
316 0x06 => instructions.push(JvmInstruction::Iconst3),
317 0x07 => instructions.push(JvmInstruction::Iconst4),
318 0x08 => instructions.push(JvmInstruction::Iconst5),
319 0x10 => {
320 if i >= bytecode.len() {
322 return Err(RuntimeError::InvalidStackState);
323 }
324 let value = bytecode[i] as i8;
325 instructions.push(JvmInstruction::Bipush(value));
326 i += 1;
327 }
328 0x11 => {
329 if i + 1 >= bytecode.len() {
331 return Err(RuntimeError::InvalidStackState);
332 }
333 let value = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
334 instructions.push(JvmInstruction::Sipush(value as i16));
335 i += 2;
336 }
337 0x12 => {
338 if i >= bytecode.len() {
340 return Err(RuntimeError::InvalidStackState);
341 }
342 let index = bytecode[i] as u16;
343 instructions.push(JvmInstruction::Ldc(index));
344 i += 1;
345 }
346 0x14 => {
347 if i + 1 >= bytecode.len() {
349 return Err(RuntimeError::InvalidStackState);
350 }
351 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
352 instructions.push(JvmInstruction::Ldc2W(index));
353 i += 2;
354 }
355 0x15 => {
356 if i >= bytecode.len() {
358 return Err(RuntimeError::InvalidStackState);
359 }
360 let index = bytecode[i];
361 instructions.push(JvmInstruction::Iload(index));
362 i += 1;
363 }
364 0x1A => instructions.push(JvmInstruction::Iload0),
365 0x1B => instructions.push(JvmInstruction::Iload1),
366 0x1C => instructions.push(JvmInstruction::Iload2),
367 0x1D => instructions.push(JvmInstruction::Iload3),
368
369 0x19 => {
371 if i >= bytecode.len() {
373 return Err(RuntimeError::InvalidStackState);
374 }
375 let index = bytecode[i];
376 instructions.push(JvmInstruction::Aload(index));
377 i += 1;
378 }
379 0x2A => instructions.push(JvmInstruction::Aload0),
380 0x2B => instructions.push(JvmInstruction::Aload1),
381 0x2C => instructions.push(JvmInstruction::Aload2),
382 0x2D => instructions.push(JvmInstruction::Aload3),
383 0x3A => {
384 if i >= bytecode.len() {
386 return Err(RuntimeError::InvalidStackState);
387 }
388 let index = bytecode[i];
389 instructions.push(JvmInstruction::Astore(index));
390 i += 1;
391 }
392 0x4B => instructions.push(JvmInstruction::Astore0),
393 0x4C => instructions.push(JvmInstruction::Astore1),
394 0x4D => instructions.push(JvmInstruction::Astore2),
395 0x4E => instructions.push(JvmInstruction::Astore3),
396
397 0x36 => {
398 if i >= bytecode.len() {
400 return Err(RuntimeError::InvalidStackState);
401 }
402 let index = bytecode[i];
403 instructions.push(JvmInstruction::Istore(index));
404 i += 1;
405 }
406 0x3B => instructions.push(JvmInstruction::Istore0),
407 0x3C => instructions.push(JvmInstruction::Istore1),
408 0x3D => instructions.push(JvmInstruction::Istore2),
409 0x3E => instructions.push(JvmInstruction::Istore3),
410 0x57 => instructions.push(JvmInstruction::Pop),
411 0x59 => instructions.push(JvmInstruction::Dup),
412 0x5F => instructions.push(JvmInstruction::Swap),
413 0x60 => instructions.push(JvmInstruction::Iadd),
414 0x64 => instructions.push(JvmInstruction::Isub),
415 0x68 => instructions.push(JvmInstruction::Imul),
416 0x6C => instructions.push(JvmInstruction::Idiv),
417 0x70 => instructions.push(JvmInstruction::Irem),
418 0x63 => instructions.push(JvmInstruction::Dadd),
419 0x67 => instructions.push(JvmInstruction::Dsub),
420 0x6B => instructions.push(JvmInstruction::Dmul),
421 0x6F => instructions.push(JvmInstruction::Ddiv),
422 0x87 => instructions.push(JvmInstruction::I2d),
423 0x8E => instructions.push(JvmInstruction::D2i),
424
425 0x1E => instructions.push(JvmInstruction::Lload0),
427 0x1F => instructions.push(JvmInstruction::Lload1),
428 0x20 => instructions.push(JvmInstruction::Lload2),
429 0x21 => instructions.push(JvmInstruction::Lload3),
430
431 0x18 => {
433 if i >= bytecode.len() {
435 return Err(RuntimeError::InvalidStackState);
436 }
437 let index = bytecode[i];
438 instructions.push(JvmInstruction::Dload(index));
439 i += 1;
440 }
441 0x26 => instructions.push(JvmInstruction::Dload0),
442 0x27 => instructions.push(JvmInstruction::Dload1),
443 0x28 => instructions.push(JvmInstruction::Dload2),
444 0x29 => instructions.push(JvmInstruction::Dload3),
445 0x39 => {
446 if i >= bytecode.len() {
448 return Err(RuntimeError::InvalidStackState);
449 }
450 let index = bytecode[i];
451 instructions.push(JvmInstruction::Dstore(index));
452 i += 1;
453 }
454 0x47 => instructions.push(JvmInstruction::Dstore0),
455 0x48 => instructions.push(JvmInstruction::Dstore1),
456 0x49 => instructions.push(JvmInstruction::Dstore2),
457 0x4A => instructions.push(JvmInstruction::Dstore3),
458
459 0x3F => instructions.push(JvmInstruction::Lstore0),
460 0x40 => instructions.push(JvmInstruction::Lstore1),
461 0x41 => instructions.push(JvmInstruction::Lstore2),
462 0x42 => instructions.push(JvmInstruction::Lstore3),
463
464 0xA7 => {
465 if i + 1 >= bytecode.len() {
467 return Err(RuntimeError::InvalidStackState);
468 }
469 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
470 instructions.push(JvmInstruction::Goto(offset));
471 i += 2;
472 }
473 0x99 => {
474 if i + 1 >= bytecode.len() {
476 return Err(RuntimeError::InvalidStackState);
477 }
478 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
479 instructions.push(JvmInstruction::Ifeq(offset));
480 i += 2;
481 }
482 0x9A => {
483 if i + 1 >= bytecode.len() {
485 return Err(RuntimeError::InvalidStackState);
486 }
487 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
488 instructions.push(JvmInstruction::Ifne(offset));
489 i += 2;
490 }
491 0x9B => {
492 if i + 1 >= bytecode.len() {
494 return Err(RuntimeError::InvalidStackState);
495 }
496 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
497 instructions.push(JvmInstruction::Iflt(offset));
498 i += 2;
499 }
500 0x9C => {
501 if i + 1 >= bytecode.len() {
503 return Err(RuntimeError::InvalidStackState);
504 }
505 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
506 instructions.push(JvmInstruction::Ifge(offset));
507 i += 2;
508 }
509 0x9D => {
510 if i + 1 >= bytecode.len() {
512 return Err(RuntimeError::InvalidStackState);
513 }
514 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
515 instructions.push(JvmInstruction::Ifgt(offset));
516 i += 2;
517 }
518 0x9E => {
519 if i + 1 >= bytecode.len() {
521 return Err(RuntimeError::InvalidStackState);
522 }
523 let offset = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
524 instructions.push(JvmInstruction::Ifle(offset));
525 i += 2;
526 }
527 0xB1 => instructions.push(JvmInstruction::Return),
528 0xAC => instructions.push(JvmInstruction::Ireturn),
529 0xB2 => {
530 if i + 1 >= bytecode.len() {
532 return Err(RuntimeError::InvalidStackState);
533 }
534 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
535 instructions.push(JvmInstruction::Getstatic(index));
536 i += 2;
537 }
538 0xB6 => {
539 if i + 1 >= bytecode.len() {
541 return Err(RuntimeError::InvalidStackState);
542 }
543 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
544 instructions.push(JvmInstruction::Invokevirtual(index));
545 i += 2;
546 }
547 0xB8 => {
548 if i + 1 >= bytecode.len() {
550 return Err(RuntimeError::InvalidStackState);
551 }
552 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
553 instructions.push(JvmInstruction::Invokestatic(index));
554 i += 2;
555 }
556 0x0B => instructions.push(JvmInstruction::Lconst0), 0x0E => instructions.push(JvmInstruction::Dconst0),
558 0x0F => instructions.push(JvmInstruction::Dconst1),
559 0xB7 => {
560 if i + 1 >= bytecode.len() {
562 return Err(RuntimeError::InvalidStackState);
563 }
564 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
565 instructions.push(JvmInstruction::Invokespecial(index));
566 i += 2;
567 }
568 0xBB => {
569 if i + 1 >= bytecode.len() {
571 return Err(RuntimeError::InvalidStackState);
572 }
573 let index = ((bytecode[i] as u16) << 8) | (bytecode[i + 1] as u16);
574 instructions.push(JvmInstruction::New(index));
575 i += 2;
576 }
577 _ => {
578 eprintln!(
580 "Warning: Unknown opcode 0x{:02X} at position {}",
581 opcode,
582 i - 1
583 );
584 }
585 }
586 }
587
588 Ok(instructions)
589}
590
591fn check_is_main_method(
592 constant_pool: &ConstantPool,
593 name_index: u16,
594 descriptor_index: u16,
595) -> (bool, bool) {
596 let entries = constant_pool.entries();
597
598 if let Some(ConstantPoolEntry::Utf8(name)) = entries.get((name_index - 1) as usize) {
600 if name != "main" {
601 return (false, false);
602 }
603 } else {
604 return (false, false);
605 }
606
607 if let Some(ConstantPoolEntry::Utf8(descriptor)) = entries.get((descriptor_index - 1) as usize)
609 {
610 match descriptor.as_str() {
611 "()V" => (true, true), "([Ljava/lang/String;)V" => (true, false), _ => (false, false),
614 }
615 } else {
616 (false, false)
617 }
618}
619
620fn check_is_code_attribute(constant_pool: &ConstantPool, attribute_name_index: u16) -> bool {
621 let entries = constant_pool.entries();
622
623 if let Some(ConstantPoolEntry::Utf8(attr_name)) =
624 entries.get((attribute_name_index - 1) as usize)
625 {
626 attr_name == "Code"
627 } else {
628 false
629 }
630}
631
632fn read_u8(cursor: &mut Cursor<&[u8]>) -> Result<u8, RuntimeError> {
633 let mut buf = [0u8; 1];
634 cursor
635 .read_exact(&mut buf)
636 .map_err(|_| RuntimeError::InvalidStackState)?;
637 Ok(buf[0])
638}
639
640fn read_u16(cursor: &mut Cursor<&[u8]>) -> Result<u16, RuntimeError> {
641 let mut buf = [0u8; 2];
642 cursor
643 .read_exact(&mut buf)
644 .map_err(|_| RuntimeError::InvalidStackState)?;
645 Ok(u16::from_be_bytes(buf))
646}
647
648fn read_u32(cursor: &mut Cursor<&[u8]>) -> Result<u32, RuntimeError> {
649 let mut buf = [0u8; 4];
650 cursor
651 .read_exact(&mut buf)
652 .map_err(|_| RuntimeError::InvalidStackState)?;
653 Ok(u32::from_be_bytes(buf))
654}
655
656fn read_i32(cursor: &mut Cursor<&[u8]>) -> Result<i32, RuntimeError> {
657 let mut buf = [0u8; 4];
658 cursor
659 .read_exact(&mut buf)
660 .map_err(|_| RuntimeError::InvalidStackState)?;
661 Ok(i32::from_be_bytes(buf))
662}
663
664fn read_f32(cursor: &mut Cursor<&[u8]>) -> Result<f32, RuntimeError> {
665 let mut buf = [0u8; 4];
666 cursor
667 .read_exact(&mut buf)
668 .map_err(|_| RuntimeError::InvalidStackState)?;
669 Ok(f32::from_be_bytes(buf))
670}
671
672fn read_i64(cursor: &mut Cursor<&[u8]>) -> Result<i64, RuntimeError> {
673 let mut buf = [0u8; 8];
674 cursor
675 .read_exact(&mut buf)
676 .map_err(|_| RuntimeError::InvalidStackState)?;
677 Ok(i64::from_be_bytes(buf))
678}
679
680fn read_f64(cursor: &mut Cursor<&[u8]>) -> Result<f64, RuntimeError> {
681 let mut buf = [0u8; 8];
682 cursor
683 .read_exact(&mut buf)
684 .map_err(|_| RuntimeError::InvalidStackState)?;
685 Ok(f64::from_be_bytes(buf))
686}
687
688fn get_utf8_from_pool(constant_pool: &ConstantPool, index: u16) -> String {
689 if index == 0 {
690 return String::new();
691 }
692
693 let entries = constant_pool.entries();
694 let actual_index = (index - 1) as usize;
695
696 if actual_index < entries.len() {
697 if let ConstantPoolEntry::Utf8(s) = &entries[actual_index] {
698 return s.clone();
699 }
700 }
701
702 String::new()
703}